-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
.Net: Add support for ImageContent to use data URIs in ChatPromptPars…
…er so templates can use base64 encoded images. (#8401) ### Motivation and Context <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> At present, including images in prompt templates using base64 data encoding is not possible. This limitation is due to `ChatPromptParser.cs` exclusively calling the `ImageContent` constructor that requires a URI, which leads to an `InvalidOperationException`. The change required is straightforward and the limitation has been discussed before, [for example here](#7121). Closes #7150. ### Description The proposed trivial fix involves a simple check to determine if the content starts with `data:`, and if it does the `ImageContent` constructor that accepts a `dataUri` is utilized instead. ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄 --------- Co-authored-by: Marcelo Garcia 🛸 <marcgarc@microsoft.com>
- Loading branch information
1 parent
3bfee7b
commit 78289af
Showing
4 changed files
with
110 additions
and
2 deletions.
There are no files selected for viewing
51 changes: 51 additions & 0 deletions
51
dotnet/samples/Concepts/PromptTemplates/HandlebarsVisionPrompts.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
// Copyright (c) Microsoft. All rights reserved. | ||
|
||
using Microsoft.SemanticKernel; | ||
using Microsoft.SemanticKernel.PromptTemplates.Handlebars; | ||
|
||
namespace PromptTemplates; | ||
|
||
// This example shows how to use chat completion handlebars template prompts with base64 encoded images as a parameter. | ||
public class HandlebarsVisionPrompts(ITestOutputHelper output) : BaseTest(output) | ||
{ | ||
[Fact] | ||
public async Task RunAsync() | ||
{ | ||
const string HandlebarsTemplate = """ | ||
<message role="system">You are an AI assistant designed to help with image recognition tasks.</message> | ||
<message role="user"> | ||
<text>{{request}}</text> | ||
<image>{{imageData}}</image> | ||
</message> | ||
"""; | ||
|
||
var kernel = Kernel.CreateBuilder() | ||
.AddOpenAIChatCompletion( | ||
modelId: TestConfiguration.OpenAI.ChatModelId, | ||
apiKey: TestConfiguration.OpenAI.ApiKey) | ||
.Build(); | ||
|
||
var templateFactory = new HandlebarsPromptTemplateFactory(); | ||
var promptTemplateConfig = new PromptTemplateConfig() | ||
{ | ||
Template = HandlebarsTemplate, | ||
TemplateFormat = "handlebars", | ||
Name = "Vision_Chat_Prompt", | ||
}; | ||
var function = kernel.CreateFunctionFromPrompt(promptTemplateConfig, templateFactory); | ||
|
||
var arguments = new KernelArguments(new Dictionary<string, object?> | ||
{ | ||
{"request","Describe this image:"}, | ||
{"imageData", "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAAXNSR0IArs4c6QAAACVJREFUKFNj/KTO/J+BCMA4iBUyQX1A0I10VAizCj1oMdyISyEAFoQbHwTcuS8AAAAASUVORK5CYII="} | ||
}); | ||
|
||
var response = await kernel.InvokeAsync(function, arguments); | ||
Console.WriteLine(response); | ||
|
||
/* | ||
Output: | ||
The image is a solid block of bright red color. There are no additional features, shapes, or textures present. | ||
*/ | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters