Merge branch 'main' into onnxembeddinggeneration

microsoft · Mar 20, 2024 · 5198d23 · 5198d23
2 parents 0d9c8aa + 51ee30f
commit 5198d23
Show file tree

Hide file tree

Showing 31 changed files with 853 additions and 324 deletions.
diff --git a/docs/decisions/0015-completion-service-selection.md b/docs/decisions/0015-completion-service-selection.md
@@ -1,6 +1,6 @@
 ---
 # These are optional elements. Feel free to remove any of them.
-status: accepted
+status: superseded by [ADR-0038](0038-completion-service-selection.md)
 contact: SergeyMenshykh
 date: 2023-10-25
 deciders: markwallace-microsoft, matthewbolanos

diff --git a/docs/decisions/0038-completion-service-selection.md b/docs/decisions/0038-completion-service-selection.md
@@ -0,0 +1,28 @@
+---
+# These are optional elements. Feel free to remove any of them.
+status: accepted
+contact: markwallace-microsoft
+date: 2024-03-14
+deciders: sergeymenshykh, markwallace, rbarreto, dmytrostruk
+consulted: 
+informed: 
+---
+
+# Completion Service Selection Strategy
+
+## Context and Problem Statement
+
+Today, SK uses the current `IAIServiceSelector` implementation to determine which type of service is used when running a text prompt.
+The `IAIServiceSelector` implementation will return either a chat completion service, text generation service or it could return a service that implements both.
+The prompt will be run using chat completion by default and falls back to text generation as the alternate option.
+
+The behavior supersedes that description in [ADR-0015](0015-completion-service-selection.md)
+
+## Decision Drivers
+
+- Chat completion services are becoming dominant in the industry e.g. OpenAI has deprecated most of it's text generation services.
+- Chat completion generally provides better responses and the ability to use advanced features e.g. tool calling.
+
+## Decision Outcome
+
+Chosen option: Keep the current behavior as described above.
diff --git a/dotnet/nuget/nuget-package.props b/dotnet/nuget/nuget-package.props
@@ -1,7 +1,7 @@
 <Project>
   <PropertyGroup>
     <!-- Central version prefix - applies to all nuget packages. -->
-    <VersionPrefix>1.6.2</VersionPrefix>
+    <VersionPrefix>1.6.3</VersionPrefix>
 
     <PackageVersion Condition="'$(VersionSuffix)' != ''">$(VersionPrefix)-$(VersionSuffix)</PackageVersion>
     <PackageVersion Condition="'$(VersionSuffix)' == ''">$(VersionPrefix)</PackageVersion>
@@ -10,7 +10,7 @@
     <IsPackable>true</IsPackable>
 
     <!-- Package validation. Baseline Version should be lower than current version. -->
-    <PackageValidationBaselineVersion>1.6.2</PackageValidationBaselineVersion>
+    <PackageValidationBaselineVersion>1.6.3</PackageValidationBaselineVersion>
     <!-- Validate assembly attributes only for Publish builds -->
     <NoWarn Condition="'$(Configuration)' != 'Publish'">$(NoWarn);CP0003</NoWarn>
     <!-- Do not validate reference assemblies -->

diff --git a/dotnet/samples/KernelSyntaxExamples/Example14_SemanticMemory.cs b/dotnet/samples/KernelSyntaxExamples/Example14_SemanticMemory.cs
@@ -162,7 +162,7 @@ private static Dictionary<string, string> SampleData()
                 = "Jupyter notebook describing how to get started with the Semantic Kernel",
             ["https://github.com/microsoft/semantic-kernel/tree/main/samples/plugins/ChatPlugin/ChatGPT"]
                 = "Sample demonstrating how to create a chat plugin interfacing with ChatGPT",
-            ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel/Memory/VolatileMemoryStore.cs"]
+            ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/Plugins/Plugins.Memory/VolatileMemoryStore.cs"]
                 = "C# class that defines a volatile embedding store",
         };
     }

diff --git a/dotnet/samples/KernelSyntaxExamples/Example55_TextChunker.cs b/dotnet/samples/KernelSyntaxExamples/Example55_TextChunker.cs
@@ -84,75 +84,103 @@ public enum TokenCounterType
     /// Custom token counter implementation using SharpToken.
     /// Note: SharpToken is used for demonstration purposes only, it's possible to use any available or custom tokenization logic.
     /// </summary>
-    private static TokenCounter SharpTokenTokenCounter => (string input) =>
+    public class SharpTokenTokenCounter
     {
-        // Initialize encoding by encoding name
-        var encoding = GptEncoding.GetEncoding("cl100k_base");
+        private readonly GptEncoding _encoding;
 
-        // Initialize encoding by model name
-        // var encoding = GptEncoding.GetEncodingForModel("gpt-4");
+        public SharpTokenTokenCounter()
+        {
+            this._encoding = GptEncoding.GetEncoding("cl100k_base");
+            // Initialize encoding by model name
+            // this._encoding = GptEncoding.GetEncodingForModel("gpt-4");
+        }
 
-        var tokens = encoding.Encode(input);
+        public int Count(string input)
+        {
+            var tokens = this._encoding.Encode(input);
 
-        return tokens.Count;
-    };
+            return tokens.Count;
+        }
+    }
 
     /// <summary>
     /// MicrosoftML token counter implementation.
     /// </summary>
-    private static TokenCounter MicrosoftMLTokenCounter => (string input) =>
+    public class MicrosoftMLTokenCounter
     {
-        Tokenizer tokenizer = new(new Bpe());
-        var tokens = tokenizer.Encode(input).Tokens;
+        private readonly Tokenizer _tokenizer;
 
-        return tokens.Count;
-    };
+        public MicrosoftMLTokenCounter()
+        {
+            this._tokenizer = new(new Bpe());
+        }
+
+        public int Count(string input)
+        {
+            var tokens = this._tokenizer.Encode(input).Tokens;
+
+            return tokens.Count;
+        }
+    }
 
     /// <summary>
     /// MicrosoftML token counter implementation using Roberta and local vocab
     /// </summary>
-    private static TokenCounter MicrosoftMLRobertaTokenCounter => (string input) =>
+    public class MicrosoftMLRobertaTokenCounter
     {
-        var encoder = EmbeddedResource.ReadStream("EnglishRoberta.encoder.json");
-        var vocab = EmbeddedResource.ReadStream("EnglishRoberta.vocab.bpe");
-        var dict = EmbeddedResource.ReadStream("EnglishRoberta.dict.txt");
+        private readonly Tokenizer _tokenizer;
 
-        if (encoder is null || vocab is null || dict is null)
+        public MicrosoftMLRobertaTokenCounter()
         {
-            throw new FileNotFoundException("Missing required resources");
-        }
+            var encoder = EmbeddedResource.ReadStream("EnglishRoberta.encoder.json");
+            var vocab = EmbeddedResource.ReadStream("EnglishRoberta.vocab.bpe");
+            var dict = EmbeddedResource.ReadStream("EnglishRoberta.dict.txt");
 
-        EnglishRoberta model = new(encoder, vocab, dict);
+            if (encoder is null || vocab is null || dict is null)
+            {
+                throw new FileNotFoundException("Missing required resources");
+            }
 
-        model.AddMaskSymbol(); // Not sure what this does, but it's in the example
-        Tokenizer tokenizer = new(model, new RobertaPreTokenizer());
-        var tokens = tokenizer.Encode(input).Tokens;
+            EnglishRoberta model = new(encoder, vocab, dict);
 
-        return tokens.Count;
-    };
+            model.AddMaskSymbol(); // Not sure what this does, but it's in the example
+            this._tokenizer = new(model, new RobertaPreTokenizer());
+        }
+
+        public int Count(string input)
+        {
+            var tokens = this._tokenizer.Encode(input).Tokens;
+
+            return tokens.Count;
+        }
+    }
 
     /// <summary>
     /// DeepDev token counter implementation.
     /// </summary>
-    private static TokenCounter DeepDevTokenCounter => (string input) =>
+    public class DeepDevTokenCounter
     {
-        // Initialize encoding by encoding name
-        var tokenizer = TokenizerBuilder.CreateByEncoderNameAsync("cl100k_base").GetAwaiter().GetResult();
+        private readonly ITokenizer _tokenizer;
 
-        // Initialize encoding by model name
-        // var tokenizer = TokenizerBuilder.CreateByModelNameAsync("gpt-4").GetAwaiter().GetResult();
+        public DeepDevTokenCounter()
+        {
+            this._tokenizer = TokenizerBuilder.CreateByEncoderNameAsync("cl100k_base").GetAwaiter().GetResult();
+        }
 
-        var tokens = tokenizer.Encode(input, new HashSet<string>());
-        return tokens.Count;
-    };
+        public int Count(string input)
+        {
+            var tokens = this._tokenizer.Encode(input, new HashSet<string>());
+            return tokens.Count;
+        }
+    }
 
     private static readonly Func<TokenCounterType, TokenCounter> s_tokenCounterFactory = (TokenCounterType counterType) =>
         counterType switch
         {
-            TokenCounterType.SharpToken => (string input) => SharpTokenTokenCounter(input),
-            TokenCounterType.MicrosoftML => (string input) => MicrosoftMLTokenCounter(input),
-            TokenCounterType.DeepDev => (string input) => DeepDevTokenCounter(input),
-            TokenCounterType.MicrosoftMLRoberta => (string input) => MicrosoftMLRobertaTokenCounter(input),
+            TokenCounterType.SharpToken => new SharpTokenTokenCounter().Count,
+            TokenCounterType.MicrosoftML => new MicrosoftMLTokenCounter().Count,
+            TokenCounterType.DeepDev => new DeepDevTokenCounter().Count,
+            TokenCounterType.MicrosoftMLRoberta => new MicrosoftMLRobertaTokenCounter().Count,
             _ => throw new ArgumentOutOfRangeException(nameof(counterType), counterType, null),
         };
 

diff --git a/dotnet/src/Connectors/Connectors.OpenAI/AzureSdk/ClientCore.cs b/dotnet/src/Connectors/Connectors.OpenAI/AzureSdk/ClientCore.cs
@@ -749,6 +749,7 @@ internal static OpenAIClientOptions GetOpenAIClientOptions(HttpClient? httpClien
         {
             options.Transport = new HttpClientTransport(httpClient);
             options.RetryPolicy = new RetryPolicy(maxRetries: 0); // Disable Azure SDK retry policy if and only if a custom HttpClient is provided.
+            options.Retry.NetworkTimeout = Timeout.InfiniteTimeSpan; // Disable Azure SDK default timeout
         }
 
         return options;

diff --git a/...ctors/Connectors.OpenAI/ChatCompletionWithData/AzureOpenAIChatCompletionWithDataConfig.cs b/...ctors/Connectors.OpenAI/ChatCompletionWithData/AzureOpenAIChatCompletionWithDataConfig.cs
@@ -27,7 +27,7 @@ public class AzureOpenAIChatCompletionWithDataConfig
     public string CompletionApiKey { get; set; } = string.Empty;
 
     /// <summary>
-    /// Azure OpenAI Completion API version (e.g. 2023-06-01-preview)
+    /// Azure OpenAI Completion API version (e.g. 2024-02-01)
     /// </summary>
     public string CompletionApiVersion { get; set; } = string.Empty;
 

diff --git a/...tors/Connectors.OpenAI/ChatCompletionWithData/AzureOpenAIChatCompletionWithDataService.cs b/...tors/Connectors.OpenAI/ChatCompletionWithData/AzureOpenAIChatCompletionWithDataService.cs
@@ -83,7 +83,7 @@ public async IAsyncEnumerable<StreamingTextContent> GetStreamingTextContentsAsyn
 
     #region private ================================================================================
 
-    private const string DefaultApiVersion = "2023-06-01-preview";
+    private const string DefaultApiVersion = "2024-02-01";
 
     private readonly AzureOpenAIChatCompletionWithDataConfig _config;
 

diff --git a/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/AzureOpenAITextToImageService.cs b/dotnet/src/Connectors/Connectors.OpenAI/TextToImage/AzureOpenAITextToImageService.cs
@@ -131,7 +131,7 @@ private static OpenAIClientOptions GetClientOptions(HttpClient? httpClient, stri
     {
         OpenAIClientOptions.ServiceVersion version = apiVersion switch
         {
-            // DALL-E 3 is only supported post 2023-12-01-preview
+            // DALL-E 3 is supported in the latest API releases
             _ => OpenAIClientOptions.ServiceVersion.V2024_02_15_Preview
         };
 

diff --git a/...nectors.UnitTests/OpenAI/ChatCompletionWithData/AzureOpenAIChatCompletionWithDataTests.cs b/...nectors.UnitTests/OpenAI/ChatCompletionWithData/AzureOpenAIChatCompletionWithDataTests.cs
@@ -86,7 +86,7 @@ public async Task DefaultApiVersionShouldBeUsedAsync()
         // Assert
         var actualUri = this._messageHandlerStub.RequestUri?.AbsoluteUri;
 
-        Assert.Contains("2023-06-01-preview", actualUri, StringComparison.OrdinalIgnoreCase);
+        Assert.Contains("2024-02-01", actualUri, StringComparison.OrdinalIgnoreCase);
     }
 
     [Fact]

diff --git a/dotnet/src/SemanticKernel.Core/CompatibilitySuppressions.xml b/dotnet/src/SemanticKernel.Core/CompatibilitySuppressions.xml
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!-- https://learn.microsoft.com/en-us/dotnet/fundamentals/package-validation/diagnostic-ids -->
+<Suppressions xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
+  <Suppression>
+    <DiagnosticId>CP0002</DiagnosticId>
+    <Target>M:Microsoft.SemanticKernel.Text.TextChunker.SplitMarkdownParagraphs(System.Collections.Generic.List{System.String},System.Int32,System.Int32,System.String,Microsoft.SemanticKernel.Text.TextChunker.TokenCounter)</Target>
+    <Left>lib/netstandard2.0/Microsoft.SemanticKernel.Core.dll</Left>
+    <Right>lib/netstandard2.0/Microsoft.SemanticKernel.Core.dll</Right>
+    <IsBaselineSuppression>true</IsBaselineSuppression>
+  </Suppression>
+  <Suppression>
+    <DiagnosticId>CP0002</DiagnosticId>
+    <Target>M:Microsoft.SemanticKernel.Text.TextChunker.SplitPlainTextParagraphs(System.Collections.Generic.List{System.String},System.Int32,System.Int32,System.String,Microsoft.SemanticKernel.Text.TextChunker.TokenCounter)</Target>
+    <Left>lib/netstandard2.0/Microsoft.SemanticKernel.Core.dll</Left>
+    <Right>lib/netstandard2.0/Microsoft.SemanticKernel.Core.dll</Right>
+    <IsBaselineSuppression>true</IsBaselineSuppression>
+  </Suppression>
+</Suppressions>
diff --git a/dotnet/src/SemanticKernel.Core/Text/TextChunker.cs b/dotnet/src/SemanticKernel.Core/Text/TextChunker.cs
@@ -35,7 +35,6 @@ public static class TextChunker
     /// <param name="maxTokensPerLine">Maximum number of tokens per line.</param>
     /// <param name="tokenCounter">Function to count tokens in a string. If not supplied, the default counter will be used.</param>
     /// <returns>List of lines.</returns>
-    [Experimental("SKEXP0050")]
     public static List<string> SplitPlainTextLines(string text, int maxTokensPerLine, TokenCounter? tokenCounter = null) =>
         InternalSplitLines(text, maxTokensPerLine, trim: true, s_plaintextSplitOptions, tokenCounter);
 
@@ -46,7 +45,6 @@ public static List<string> SplitPlainTextLines(string text, int maxTokensPerLine
     /// <param name="maxTokensPerLine">Maximum number of tokens per line.</param>
     /// <param name="tokenCounter">Function to count tokens in a string. If not supplied, the default counter will be used.</param>
     /// <returns>List of lines.</returns>
-    [Experimental("SKEXP0050")]
     public static List<string> SplitMarkDownLines(string text, int maxTokensPerLine, TokenCounter? tokenCounter = null) =>
         InternalSplitLines(text, maxTokensPerLine, trim: true, s_markdownSplitOptions, tokenCounter);
 
@@ -59,8 +57,7 @@ public static List<string> SplitMarkDownLines(string text, int maxTokensPerLine,
     /// <param name="chunkHeader">Text to be prepended to each individual chunk.</param>
     /// <param name="tokenCounter">Function to count tokens in a string. If not supplied, the default counter will be used.</param>
     /// <returns>List of paragraphs.</returns>
-    [Experimental("SKEXP0050")]
-    public static List<string> SplitPlainTextParagraphs(List<string> lines, int maxTokensPerParagraph, int overlapTokens = 0, string? chunkHeader = null, TokenCounter? tokenCounter = null) =>
+    public static List<string> SplitPlainTextParagraphs(IEnumerable<string> lines, int maxTokensPerParagraph, int overlapTokens = 0, string? chunkHeader = null, TokenCounter? tokenCounter = null) =>
         InternalSplitTextParagraphs(lines, maxTokensPerParagraph, overlapTokens, chunkHeader, static (text, maxTokens, tokenCounter) => InternalSplitLines(text, maxTokens, trim: false, s_plaintextSplitOptions, tokenCounter), tokenCounter);
 
     /// <summary>
@@ -72,12 +69,10 @@ public static List<string> SplitPlainTextParagraphs(List<string> lines, int maxT
     /// <param name="chunkHeader">Text to be prepended to each individual chunk.</param>
     /// <param name="tokenCounter">Function to count tokens in a string. If not supplied, the default counter will be used.</param>
     /// <returns>List of paragraphs.</returns>
-    [Experimental("SKEXP0050")]
-    public static List<string> SplitMarkdownParagraphs(List<string> lines, int maxTokensPerParagraph, int overlapTokens = 0, string? chunkHeader = null, TokenCounter? tokenCounter = null) =>
+    public static List<string> SplitMarkdownParagraphs(IEnumerable<string> lines, int maxTokensPerParagraph, int overlapTokens = 0, string? chunkHeader = null, TokenCounter? tokenCounter = null) =>
         InternalSplitTextParagraphs(lines, maxTokensPerParagraph, overlapTokens, chunkHeader, static (text, maxTokens, tokenCounter) => InternalSplitLines(text, maxTokens, trim: false, s_markdownSplitOptions, tokenCounter), tokenCounter);
 
-    [Experimental("SKEXP0050")]
-    private static List<string> InternalSplitTextParagraphs(List<string> lines, int maxTokensPerParagraph, int overlapTokens, string? chunkHeader, Func<string, int, TokenCounter?, List<string>> longLinesSplitter, TokenCounter? tokenCounter)
+    private static List<string> InternalSplitTextParagraphs(IEnumerable<string> lines, int maxTokensPerParagraph, int overlapTokens, string? chunkHeader, Func<string, int, TokenCounter?, List<string>> longLinesSplitter, TokenCounter? tokenCounter)
     {
         if (maxTokensPerParagraph <= 0)
         {
@@ -89,7 +84,8 @@ private static List<string> InternalSplitTextParagraphs(List<string> lines, int
             throw new ArgumentException("overlapTokens cannot be larger than maxTokensPerParagraph", nameof(maxTokensPerParagraph));
         }
 
-        if (lines.Count == 0)
+        // Optimize empty inputs if we can efficiently determine the're empty
+        if (lines is ICollection<string> c && c.Count == 0)
         {
             return new List<string>();
         }
@@ -106,7 +102,6 @@ private static List<string> InternalSplitTextParagraphs(List<string> lines, int
         return processedParagraphs;
     }
 
-    [Experimental("SKEXP0050")]
     private static List<string> BuildParagraph(IEnumerable<string> truncatedLines, int maxTokensPerParagraph, TokenCounter? tokenCounter)
     {
         StringBuilder paragraphBuilder = new();
@@ -147,7 +142,6 @@ private static List<string> BuildParagraph(IEnumerable<string> truncatedLines, i
         return paragraphs;
     }
 
-    [Experimental("SKEXP0050")]
     private static List<string> ProcessParagraphs(List<string> paragraphs, int adjustedMaxTokensPerParagraph, int overlapTokens, string? chunkHeader, Func<string, int, TokenCounter?, List<string>> longLinesSplitter, TokenCounter? tokenCounter)
     {
         // distribute text more evenly in the last paragraphs when the last paragraph is too short.
@@ -212,7 +206,6 @@ private static List<string> ProcessParagraphs(List<string> paragraphs, int adjus
         return processedParagraphs;
     }
 
-    [Experimental("SKEXP0050")]
     private static List<string> InternalSplitLines(string text, int maxTokensPerLine, bool trim, string?[] splitOptions, TokenCounter? tokenCounter)
     {
         var result = new List<string>();
@@ -233,7 +226,6 @@ private static List<string> InternalSplitLines(string text, int maxTokensPerLine
         return result;
     }
 
-    [Experimental("SKEXP0050")]
     private static (List<string>, bool) Split(List<string> input, int maxTokens, ReadOnlySpan<char> separators, bool trim, TokenCounter? tokenCounter)
     {
         bool inputWasSplit = false;
@@ -248,7 +240,6 @@ private static (List<string>, bool) Split(List<string> input, int maxTokens, Rea
         return (result, inputWasSplit);
     }
 
-    [Experimental("SKEXP0050")]
     private static (List<string>, bool) Split(ReadOnlySpan<char> input, string? inputString, int maxTokens, ReadOnlySpan<char> separators, bool trim, TokenCounter? tokenCounter)
     {
         Debug.Assert(inputString is null || input.SequenceEqual(inputString.AsSpan()));

diff --git a/python/.conf/.pre-commit-config.yaml b/python/.conf/.pre-commit-config.yaml
@@ -1,3 +1,5 @@
+files: ^python/
+fail_fast: true
 repos:
   - repo: https://github.com/floatingpurr/sync_with_poetry
     rev: 1.1.0
@@ -24,4 +26,20 @@ repos:
     rev: v0.3.2
     hooks:
       - id: ruff
-        args: [ --fix, --exit-non-zero-on-fix ]
+        args: [ --fix, --exit-non-zero-on-fix ]
+  - repo: local
+    hooks:
+    - id: mypy
+      name: mypy
+      entry: poetry -C python/ run python -m mypy --no-namespace-packages --config-file=python/mypy.ini 
+      language: system
+      types: [python]
+      pass_filenames: true
+  - repo: local
+    hooks:
+    - id: tests
+      name: tests
+      entry: poetry -C python/ run coverage run -m pytest python/tests/unit
+      language: system
+      types: [python]
+      pass_filenames: true