Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions src/GenerativeAI.Microsoft/GenerativeAIImageGenerator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
#pragma warning disable MEAI001
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using GenerativeAI;
using GenerativeAI.Core;
using GenerativeAI.Types;
using GenerativeAI.Microsoft.Extensions;
using Microsoft.Extensions.AI;

namespace GenerativeAI.Microsoft;

/// <summary>
/// Implements Microsoft.Extensions.AI.IImageGenerator using the Google_GenerativeAI SDK by
/// creating a GenerateContentRequest that requests image modality and forwarding it to
/// <see cref="GenerativeModel.GenerateContentAsync(GenerateContentRequest, CancellationToken)"/>.
/// </summary>
public sealed class GenerativeAIImageGenerator : IImageGenerator
{
/// <summary>
/// Underlying GenerativeModel instance.
/// </summary>
public GenerativeModel model { get; }

/// <summary>
/// Creates a new instance using an API key and optional model name.
/// </summary>
public GenerativeAIImageGenerator(string apiKey, string modelName = GoogleAIModels.Gemini2FlashPreviewImageGeneration)
{
model = new GenerativeModel(apiKey, modelName);
}

/// <summary>
/// Creates a new instance using a platform adapter and optional model name.
/// </summary>
public GenerativeAIImageGenerator(IPlatformAdapter adapter, string modelName = GoogleAIModels.Gemini2FlashPreviewImageGeneration)
{
model = new GenerativeModel(adapter, modelName);
}

/// <inheritdoc/>
public void Dispose()
{
}

/// <inheritdoc/>
public object? GetService(Type serviceType, object? serviceKey = null)
{
if (serviceKey == null && serviceType?.IsInstanceOfType(this) == true)
return this;
return null;
}

/// <inheritdoc/>
public async Task<ImageGenerationResponse> GenerateAsync(ImageGenerationRequest request,
ImageGenerationOptions? options = null, CancellationToken cancellationToken = default)
{
#if NET6_0_OR_GREATER
ArgumentNullException.ThrowIfNull(request);
#else
if (request == null) throw new ArgumentNullException(nameof(request));
#endif

var genRequest = ToGenerateContentRequest(request, options);
var resp = await model.GenerateContentAsync(genRequest, cancellationToken).ConfigureAwait(false);
return ToImageGenerationResponse(resp);
}

// Convert the Microsoft request/options into a model-specific GenerateContentRequest
private GenerateContentRequest ToGenerateContentRequest(ImageGenerationRequest request, ImageGenerationOptions? options)
{
List<Part> parts = [];
// Add prompt text (if any)
if (!string.IsNullOrEmpty(request.Prompt))
{
parts.Add(new(request.Prompt!));
}

// If original images provided (image edit scenario), add them as parts
if (request.OriginalImages != null)
{
foreach (var aiContent in request.OriginalImages)
{
parts.Add(aiContent.ToPart()!);
}
}

// Configure generation to request images
GenerationConfig generationConfig = options?.RawRepresentationFactory?.Invoke(this) as GenerationConfig ?? new();
generationConfig.CandidateCount = options?.Count ?? 1;

// We must request both text and image modalities to get images back
generationConfig.ResponseModalities = new List<Modality> { Modality.TEXT, Modality.IMAGE };

if (options != null)
{
if (!string.IsNullOrEmpty(options.MediaType))
generationConfig.ResponseMimeType = options.MediaType;

// Map requested image size (basic heuristic)
if (options.ImageSize.HasValue)
{
var sz = options.ImageSize.Value;
if (sz.Width >= 1024 || sz.Height >= 1024)
generationConfig.MediaResolution = MediaResolution.MEDIA_RESOLUTION_HIGH;
else if (sz.Width >= 512 || sz.Height >= 512)
generationConfig.MediaResolution = MediaResolution.MEDIA_RESOLUTION_MEDIUM;
else
generationConfig.MediaResolution = MediaResolution.MEDIA_RESOLUTION_LOW;
}
}

return new GenerateContentRequest()
{
GenerationConfig = generationConfig,
Contents = [new() { Parts = parts }]
};
}


// Convert the model response to ImageGenerationResponse
private static ImageGenerationResponse ToImageGenerationResponse(GenerateContentResponse? resp)
{
var aiContents = resp?.Candidates?.FirstOrDefault()?.Content?.Parts.ToAiContents();
return new ImageGenerationResponse(aiContents) { RawRepresentation = resp };
}
}
#pragma warning restore MEAI001
163 changes: 163 additions & 0 deletions src/GenerativeAI.Microsoft/GenerativeAIImagenGenerator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
#pragma warning disable MEAI001
using System;
using System.Collections.Generic;
using System.Drawing;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using GenerativeAI;
using GenerativeAI.Core;
using GenerativeAI.Types;
using GenerativeAI.Clients;
using GenerativeAI.Microsoft.Extensions;
using Microsoft.Extensions.AI;

namespace GenerativeAI.Microsoft;

/// <summary>
/// Implements Microsoft.Extensions.AI.IImageGenerator by creating an ImagenModel via GenAI.CreateImageModel
/// and calling <see cref="GenerativeAI.Clients.ImagenModel.GenerateImagesAsync(GenerateImageRequest, CancellationToken)"/>.
/// </summary>
public sealed class GenerativeAIImagenGenerator : IImageGenerator
{
/// <summary>
/// Underlying ImagenModel instance created from the provided GenAI factory.
/// </summary>
public ImagenModel model { get; }

/// <summary>
/// Creates a new instance using an API key and optional model name.
/// </summary>
public GenerativeAIImagenGenerator(string apiKey, string modelName = GoogleAIModels.Imagen.Imagen3Generate002):
this(new GoogleAi(apiKey), modelName)
{ }


/// <summary>
/// Creates a new instance using an existing <see cref="GenAI"/> factory and optional model name.
/// </summary>
public GenerativeAIImagenGenerator(GenAI genai, string modelName = GoogleAIModels.Imagen.Imagen3Generate002)
{
#if NET6_0_OR_GREATER
ArgumentNullException.ThrowIfNull(genai);
#else
if (genai == null) throw new ArgumentNullException(nameof(genai));
#endif
model = genai.CreateImageModel(modelName);
}

/// <inheritdoc/>
public void Dispose()
{ }

/// <inheritdoc/>
public object? GetService(Type serviceType, object? serviceKey = null)
{
if (serviceKey == null && serviceType?.IsInstanceOfType(this) == true)
return this;
return null;
}

/// <inheritdoc/>
public async Task<ImageGenerationResponse> GenerateAsync(ImageGenerationRequest request,
ImageGenerationOptions? options = null, CancellationToken cancellationToken = default)
{
#if NET6_0_OR_GREATER
ArgumentNullException.ThrowIfNull(request);
#else
if (request == null) throw new ArgumentNullException(nameof(request));
#endif

var imgRequest = ToGenerateImageRequest(request, options);
var resp = await model.GenerateImagesAsync(imgRequest, cancellationToken).ConfigureAwait(false);
return ToImageGenerationResponse(resp);
}

// Convert Microsoft ImageGenerationRequest + options to a GenerateImageRequest
private GenerateImageRequest ToGenerateImageRequest(ImageGenerationRequest request, ImageGenerationOptions? options)
{
var imgRequest = new GenerateImageRequest();
var instances = new List<ImageGenerationInstance>();

if (request.OriginalImages != null && request.OriginalImages.Any())
{
instances.AddRange(request.OriginalImages.Select(content => new ImageGenerationInstance
{
Prompt = request.Prompt,
Image = ConvertAiContentToImageSource(content)
}));
}
else
{
instances.Add(new ImageGenerationInstance { Prompt = request.Prompt });
}

ImageGenerationParameters parameters = options?.RawRepresentationFactory?.Invoke(this) as ImageGenerationParameters ?? new();
parameters.SampleCount = options?.Count ?? 1;

if (options != null)
{
if (!string.IsNullOrEmpty(options.MediaType))
{
parameters.OutputOptions = new OutputOptions { MimeType = options.MediaType };
}

if (options.ImageSize.HasValue)
{
var sz = options.ImageSize.Value;
parameters.AspectRatio = $"{sz.Width}:{sz.Height}";

}
}

return new GenerateImageRequest
{
Instances = instances,
Parameters = parameters
};
}

// Convert model response to Microsoft ImageGenerationResponse
private static ImageGenerationResponse ToImageGenerationResponse(GenerateImageResponse? resp)
{
var contents = new List<AIContent>();
if (resp?.Predictions != null)
{
foreach (var pred in resp.Predictions)
{
if (!string.IsNullOrEmpty(pred.BytesBase64Encoded))
{
var data = Convert.FromBase64String(pred.BytesBase64Encoded);
contents.Add(new DataContent(data, pred.MimeType ?? "image/png"));
}
}
}

return new ImageGenerationResponse(contents) { RawRepresentation = resp };
}

private static ImageSource? ConvertAiContentToImageSource(AIContent content)
{
if (content == null) return null;

if (content is DataContent dc)
{
return new ImageSource { BytesBase64Encoded = Convert.ToBase64String(dc.Data.ToArray()) };
}

if (content is UriContent uc)
{
var uriVal = uc.Uri?.ToString();

// Only treat known GCS URIs as storage references for Imagen API.
if (uriVal?.StartsWith("gs://", StringComparison.OrdinalIgnoreCase) == true ||
uriVal?.IndexOf("storage.googleapis.com", StringComparison.OrdinalIgnoreCase) >= 0)
{
return new ImageSource { GcsUri = uriVal };
}
}

return null;
}
}
#pragma warning restore MEAI001
60 changes: 60 additions & 0 deletions src/GenerativeAI.Microsoft/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,66 @@ public class MyChatService
}
```

### 4. Using IImageClient

Image client can also be used from a service as above. Here's a sample that shows it's capabilities.

```C#
using System.Diagnostics;
using GenerativeAI.Microsoft;
using Microsoft.Extensions.AI;

#pragma warning disable MEAI001
// ImageGen creates high quality initial images
IImageGenerator imageGenerator = new GenerativeAIImagenGenerator(
Environment.GetEnvironmentVariable("GOOGLE_API_KEY"),
"imagen-4.0-fast-generate-001");

var response = await imageGenerator.GenerateImagesAsync("A clown fish with orange and black-bordered white stripes.");
var img1 = GetImageContent(response);
SaveImage(img1, "i1.png");
ShowImage("i1.png");

response = await imageGenerator.GenerateImagesAsync("A blue tang fish, blue and black with yellow tipped fin and tail.");
var img2 = GetImageContent(response);
SaveImage(img2, "i2.png");
ShowImage("i2.png");

// Imagen cannot edit, but we can use the gemini model for that.
IImageGenerator imageGeneratorEdit = new GenerativeAIImageGenerator(
Environment.GetEnvironmentVariable("GOOGLE_API_KEY"),
"gemini-2.5-flash-image-preview");
var request = new ImageGenerationRequest()
{
Prompt = "Combine the two images into a single scene.",
OriginalImages = new[] { img1, img2 }
};
response = await imageGeneratorEdit.GenerateAsync(request);
var scene = GetImageContent(response);
SaveImage(scene, "scene.png");
ShowImage("scene.png");

response = await imageGeneratorEdit.EditImageAsync(scene, "Change the setting to a fish tank.");
var edit = GetImageContent(response);
SaveImage(edit, "edit.png");
ShowImage("edit.png");

DataContent GetImageContent(ImageGenerationResponse response) =>
response.Contents.OfType<DataContent>().Single();

void SaveImage(DataContent content, string fileName) =>
File.WriteAllBytes(fileName, content.Data.Span);

void ShowImage(string fileName)
{
Process.Start(new ProcessStartInfo
{
FileName = fileName,
UseShellExecute = true
});
}
```

## Dependencies

- [Google_GenerativeAI](https://github.com/Google_GenerativeAI) (Unofficial C# Google Generative AI SDK)
Expand Down
Loading