Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ namespace Microsoft.Extensions.AI;
[JsonDerivedType(typeof(FunctionCallContent), typeDiscriminator: "functionCall")]
[JsonDerivedType(typeof(FunctionResultContent), typeDiscriminator: "functionResult")]
[JsonDerivedType(typeof(TextContent), typeDiscriminator: "text")]
[JsonDerivedType(typeof(UriContent), typeDiscriminator: "uri")]
[JsonDerivedType(typeof(UsageContent), typeDiscriminator: "usage")]
public class AIContent
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,17 @@
using Microsoft.Shared.Diagnostics;

#pragma warning disable S3996 // URI properties should not be strings
#pragma warning disable CA1054 // URI-like parameters should not be strings
#pragma warning disable CA1056 // URI-like properties should not be strings

namespace Microsoft.Extensions.AI;

/// <summary>
/// Represents data content, such as an image or audio.
/// Represents binary content with an associated media type (also known as MIME type).
/// </summary>
/// <remarks>
/// <para>
/// The represented content may either be the actual bytes stored in this instance, or it may
/// be a URI that references the location of the content.
/// The content represents in-memory data. For references to data at a remote URI, use <see cref="UriContent"/> instead.
/// </para>
/// <para>
/// <see cref="Uri"/> always returns a valid URI string, even if the instance was constructed from
Expand All @@ -32,20 +32,27 @@ public class DataContent : AIContent
// Ideally DataContent would be based in terms of Uri. However, Uri has a length limitation that makes it prohibitive
// for the kinds of data URIs necessary to support here. As such, this type is based in strings.

/// <summary>Parsed data URI information.</summary>
private readonly DataUriParser.DataUri? _dataUri;

/// <summary>The string-based representation of the URI, including any data in the instance.</summary>
private string? _uri;

/// <summary>The data, lazily initialized if the data is provided in a data URI.</summary>
private ReadOnlyMemory<byte>? _data;

/// <summary>Parsed data URI information.</summary>
private DataUriParser.DataUri? _dataUri;

/// <summary>
/// Initializes a new instance of the <see cref="DataContent"/> class.
/// </summary>
/// <param name="uri">The URI of the content. This can be a data URI.</param>
/// <param name="mediaType">The media type (also known as MIME type) represented by the content.</param>
/// <param name="uri">The data URI containing the content.</param>
/// <param name="mediaType">
/// The media type (also known as MIME type) represented by the content. If not provided,
/// it must be provided as part of the <paramref name="uri"/>.
/// </param>
/// <exception cref="ArgumentNullException"><paramref name="uri"/> is <see langword="null"/>.</exception>
/// <exception cref="ArgumentException"><paramref name="uri"/> is not a data URI.</exception>
/// <exception cref="ArgumentException"><paramref name="uri"/> did not contain a media type and <paramref name="mediaType"/> was not supplied.</exception>
/// <exception cref="ArgumentException"><paramref name="mediaType"/> is an invalid media type.</exception>
public DataContent(Uri uri, string? mediaType = null)
: this(Throw.IfNull(uri).ToString(), mediaType)
{
Expand All @@ -54,75 +61,78 @@ public DataContent(Uri uri, string? mediaType = null)
/// <summary>
/// Initializes a new instance of the <see cref="DataContent"/> class.
/// </summary>
/// <param name="uri">The URI of the content. This can be a data URI.</param>
/// <param name="uri">The data URI containing the content.</param>
/// <param name="mediaType">The media type (also known as MIME type) represented by the content.</param>
/// <exception cref="ArgumentNullException"><paramref name="uri"/> is <see langword="null"/>.</exception>
/// <exception cref="ArgumentException"><paramref name="uri"/> is not a data URI.</exception>
/// <exception cref="ArgumentException"><paramref name="uri"/> did not contain a media type and <paramref name="mediaType"/> was not supplied.</exception>
/// <exception cref="ArgumentException"><paramref name="mediaType"/> is an invalid media type.</exception>
[JsonConstructor]
public DataContent([StringSyntax(StringSyntaxAttribute.Uri)] string uri, string? mediaType = null)
{
_uri = Throw.IfNullOrWhitespace(uri);

ValidateMediaType(ref mediaType);
MediaType = mediaType;

if (uri.StartsWith(DataUriParser.Scheme, StringComparison.OrdinalIgnoreCase))
if (!uri.StartsWith(DataUriParser.Scheme, StringComparison.OrdinalIgnoreCase))
{
_dataUri = DataUriParser.Parse(uri.AsMemory());
Throw.ArgumentException(nameof(uri), "The provided URI is not a data URI.");
}

// If the data URI contains a media type that's different from a non-null media type
// explicitly provided, prefer the one explicitly provided as an override.
if (MediaType is not null)
{
if (MediaType != _dataUri.MediaType)
{
// Extract the bytes from the data URI and null out the uri.
// Then we'll lazily recreate it later if needed based on the updated media type.
_data = _dataUri.ToByteArray();
_dataUri = null;
_uri = null;
}
}
else
_dataUri = DataUriParser.Parse(uri.AsMemory());

if (mediaType is null)
{
mediaType = _dataUri.MediaType;
if (mediaType is null)
{
MediaType = _dataUri.MediaType;
Throw.ArgumentNullException(nameof(mediaType), $"{nameof(uri)} did not contain a media type, and {nameof(mediaType)} was not provided.");
}
}
else if (!System.Uri.TryCreate(uri, UriKind.Absolute, out _))
else
{
throw new UriFormatException("The URI is not well-formed.");
if (mediaType != _dataUri.MediaType)
{
// If the data URI contains a media type that's different from a non-null media type
// explicitly provided, prefer the one explicitly provided as an override.

// Extract the bytes from the data URI and null out the uri.
// Then we'll lazily recreate it later if needed based on the updated media type.
_data = _dataUri.ToByteArray();
_dataUri = null;
_uri = null;
}
}

MediaType = DataUriParser.ThrowIfInvalidMediaType(mediaType);
}

/// <summary>
/// Initializes a new instance of the <see cref="DataContent"/> class.
/// </summary>
/// <param name="data">The byte contents.</param>
/// <param name="mediaType">The media type (also known as MIME type) represented by the content.</param>
public DataContent(ReadOnlyMemory<byte> data, string? mediaType = null)
/// <exception cref="ArgumentNullException"><paramref name="mediaType"/> is null.</exception>
/// <exception cref="ArgumentException"><paramref name="mediaType"/> is empty or composed entirely of whitespace.</exception>
public DataContent(ReadOnlyMemory<byte> data, string mediaType)
{
ValidateMediaType(ref mediaType);
MediaType = mediaType;
MediaType = DataUriParser.ThrowIfInvalidMediaType(mediaType);

_data = data;
}

/// <summary>
/// Determines whether the <see cref="MediaType"/> has the specified prefix.
/// Determines whether the <see cref="MediaType"/>'s top-level type matches the specified <paramref name="topLevelType"/>.
/// </summary>
/// <param name="prefix">The media type prefix.</param>
/// <returns><see langword="true"/> if the <see cref="MediaType"/> has the specified prefix, otherwise <see langword="false"/>.</returns>
public bool MediaTypeStartsWith(string prefix)
=> MediaType?.StartsWith(prefix, StringComparison.OrdinalIgnoreCase) is true;

/// <summary>Sets <paramref name="mediaType"/> to null if it's empty or composed entirely of whitespace.</summary>
private static void ValidateMediaType(ref string? mediaType)
{
if (!DataUriParser.IsValidMediaType(mediaType.AsSpan(), ref mediaType))
{
Throw.ArgumentException(nameof(mediaType), "Invalid media type.");
}
}
/// <param name="topLevelType">The type to compare against <see cref="MediaType"/>.</param>
/// <returns><see langword="true"/> if the type portion of <see cref="MediaType"/> matches the specified value; otherwise, false.</returns>
/// <remarks>
/// A media type is primarily composed of two parts, a "type" and a "subtype", separated by a slash ("/").
/// The type portion is also referred to as the "top-level type"; for example,
/// "image/png" has a top-level type of "image". <see cref="HasTopLevelMediaType"/> compares
/// the specified <paramref name="topLevelType"/> against the type portion of <see cref="MediaType"/>.
/// </remarks>
public bool HasTopLevelMediaType(string topLevelType) => DataUriParser.HasTopLevelMediaType(MediaType, topLevelType);

/// <summary>Gets the URI for this <see cref="DataContent"/>.</summary>
/// <summary>Gets the data URI for this <see cref="DataContent"/>.</summary>
/// <remarks>
/// The returned URI is always a valid URI string, even if the instance was constructed from a <see cref="ReadOnlyMemory{Byte}"/>
/// or from a <see cref="System.Uri"/>. In the case of a <see cref="ReadOnlyMemory{T}"/>, this property returns a data URI containing
Expand All @@ -137,8 +147,8 @@ public string Uri
{
if (_dataUri is null)
{
Debug.Assert(Data is not null, "Expected Data to be initialized.");
_uri = string.Concat("data:", MediaType, ";base64,", Convert.ToBase64String(Data.GetValueOrDefault()
Debug.Assert(_data is not null, "Expected _data to be initialized.");
_uri = string.Concat("data:", MediaType, ";base64,", Convert.ToBase64String(_data.GetValueOrDefault()
Copy link
Contributor

@rogerbarreto rogerbarreto Mar 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once the _dataUri is generated from the _data, since it's a readonly whouldn't it be nice to cache it and vice-versa?

Suggested change
_uri = string.Concat("data:", MediaType, ";base64,", Convert.ToBase64String(_data.GetValueOrDefault()
_uri = string.Concat("data:", MediaType, ";base64,", Convert.ToBase64String(_data.GetValueOrDefault()

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where would _dataUri end up being used again?

#if NET
.Span));
#else
Expand Down Expand Up @@ -167,10 +177,9 @@ public string Uri
/// If the media type was explicitly specified, this property returns that value.
/// If the media type was not explicitly specified, but a data URI was supplied and that data URI contained a non-default
/// media type, that media type is returned.
/// Otherwise, this property returns null.
/// </remarks>
[JsonPropertyOrder(1)]
public string? MediaType { get; private set; }
[JsonIgnore]
public string MediaType { get; }

/// <summary>Gets the data represented by this instance.</summary>
/// <remarks>
Expand All @@ -181,16 +190,18 @@ public string Uri
/// no attempt is made to retrieve the data from that URI.
/// </remarks>
[JsonIgnore]
public ReadOnlyMemory<byte>? Data
public ReadOnlyMemory<byte> Data
{
get
{
if (_dataUri is not null)
if (_data is null)
{
_data ??= _dataUri.ToByteArray();
Debug.Assert(_dataUri is not null, "Expected dataUri to be initialized.");
_data = _dataUri!.ToByteArray();
}

return _data;
Debug.Assert(_data is not null, "Expected data to be initialized.");
return _data.GetValueOrDefault();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@
#if NET8_0_OR_GREATER
using System.Buffers.Text;
#endif
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Net;
using System.Net.Http.Headers;
using System.Runtime.CompilerServices;
using System.Text;
using Microsoft.Shared.Diagnostics;

#pragma warning disable CA1307 // Specify StringComparison for clarity

namespace Microsoft.Extensions.AI;

Expand Down Expand Up @@ -55,29 +59,35 @@ public static DataUri Parse(ReadOnlyMemory<char> dataUri)
}

// Validate the media type, if present.
ReadOnlySpan<char> span = metadata.Span.Trim();
string? mediaType = null;
if (!IsValidMediaType(metadata.Span.Trim(), ref mediaType))
if (!span.IsEmpty && !IsValidMediaType(span, ref mediaType))
{
throw new UriFormatException("Invalid data URI format: the media type is not a valid.");
}

return new DataUri(data, isBase64, mediaType);
}

/// <summary>Validates that a media type is valid, and if successful, ensures we have it as a string.</summary>
public static bool IsValidMediaType(ReadOnlySpan<char> mediaTypeSpan, ref string? mediaType)
public static string ThrowIfInvalidMediaType(
string mediaType, [CallerArgumentExpression(nameof(mediaType))] string parameterName = "")
{
Debug.Assert(
mediaType is null || mediaTypeSpan.Equals(mediaType.AsSpan(), StringComparison.Ordinal),
"mediaType string should either be null or the same as the span");
_ = Throw.IfNullOrWhitespace(mediaType, parameterName);

// If the media type is empty or all whitespace, normalize it to null.
if (mediaTypeSpan.IsWhiteSpace())
if (!IsValidMediaType(mediaType))
{
mediaType = null;
return true;
Throw.ArgumentException(parameterName, $"An invalid media type was specified: '{mediaType}'");
}

return mediaType;
}

public static bool IsValidMediaType(string mediaType) =>
IsValidMediaType(mediaType.AsSpan(), ref mediaType);

/// <summary>Validates that a media type is valid, and if successful, ensures we have it as a string.</summary>
public static bool IsValidMediaType(ReadOnlySpan<char> mediaTypeSpan, [NotNull] ref string? mediaType)
{
// For common media types, we can avoid both allocating a string for the span and avoid parsing overheads.
string? knownType = mediaTypeSpan switch
{
Expand Down Expand Up @@ -108,7 +118,7 @@ public static bool IsValidMediaType(ReadOnlySpan<char> mediaTypeSpan, ref string
};
if (knownType is not null)
{
mediaType ??= knownType;
mediaType = knownType;
return true;
}

Expand All @@ -117,6 +127,16 @@ public static bool IsValidMediaType(ReadOnlySpan<char> mediaTypeSpan, ref string
return MediaTypeHeaderValue.TryParse(mediaType, out _);
}

public static bool HasTopLevelMediaType(string mediaType, string topLevelMediaType)
{
int slashIndex = mediaType.IndexOf('/');

ReadOnlySpan<char> span = slashIndex < 0 ? mediaType.AsSpan() : mediaType.AsSpan(0, slashIndex);
span = span.Trim();

return span.Equals(topLevelMediaType.AsSpan(), StringComparison.OrdinalIgnoreCase);
}

/// <summary>Test whether the value is a base64 string without whitespace.</summary>
private static bool IsValidBase64Data(ReadOnlySpan<char> value)
{
Expand Down
Loading
Loading