Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

StringMarshaller optimizations #69035

Merged
merged 4 commits into from
May 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ namespace System.Runtime.InteropServices.Marshalling
Features = CustomTypeMarshallerFeatures.UnmanagedResources | CustomTypeMarshallerFeatures.TwoStageMarshalling | CustomTypeMarshallerFeatures.CallerAllocatedBuffer)]
public unsafe ref struct AnsiStringMarshaller
{
private byte* _allocated;
private readonly Span<byte> _span;
private byte* _nativeValue;
private bool _allocated;

/// <summary>
/// Initializes a new instance of the <see cref="AnsiStringMarshaller"/>.
Expand All @@ -36,25 +36,29 @@ public AnsiStringMarshaller(string? str)
/// </remarks>
public AnsiStringMarshaller(string? str, Span<byte> buffer)
{
_allocated = null;
_allocated = false;

if (str is null)
{
_span = default;
_nativeValue = null;
return;
}

// + 1 for null terminator
int maxByteCount = (str.Length + 1) * Marshal.SystemMaxDBCSCharSize + 1;
if (buffer.Length >= maxByteCount)
{
Marshal.StringToAnsiString(str, (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(buffer)), buffer.Length);
_span = buffer;
}
else
// >= for null terminator
if ((long)Marshal.SystemMaxDBCSCharSize * str.Length >= buffer.Length)
{
_allocated = (byte*)Marshal.StringToCoTaskMemAnsi(str);
_span = default;
// Calculate accurate byte count when the provided stack-allocated buffer is not sufficient
int exactByteCount = Marshal.GetAnsiStringByteCount(str); // Includes null terminator
if (exactByteCount > buffer.Length)
{
buffer = new Span<byte>((byte*)Marshal.AllocCoTaskMem(exactByteCount), exactByteCount);
_allocated = true;
}
}

_nativeValue = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(buffer));

Marshal.GetAnsiStringBytes(str, buffer); // Includes null terminator
}

/// <summary>
Expand All @@ -63,7 +67,7 @@ public AnsiStringMarshaller(string? str, Span<byte> buffer)
/// <remarks>
/// <seealso cref="CustomTypeMarshallerFeatures.TwoStageMarshalling"/>
/// </remarks>
public byte* ToNativeValue() => _allocated != null ? _allocated : (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(_span));
public byte* ToNativeValue() => _nativeValue;

/// <summary>
/// Sets the native value representing the string.
Expand All @@ -72,15 +76,19 @@ public AnsiStringMarshaller(string? str, Span<byte> buffer)
/// <remarks>
/// <seealso cref="CustomTypeMarshallerFeatures.TwoStageMarshalling"/>
/// </remarks>
public void FromNativeValue(byte* value) => _allocated = value;
public void FromNativeValue(byte* value)
{
_nativeValue = value;
_allocated = true;
}

/// <summary>
/// Returns the managed string.
/// </summary>
/// <remarks>
/// <seealso cref="CustomTypeMarshallerDirection.Out"/>
/// </remarks>
public string? ToManaged() => _allocated == null ? null : new string((sbyte*)_allocated);
public string? ToManaged() => Marshal.PtrToStringAnsi((IntPtr)_nativeValue);

/// <summary>
/// Frees native resources.
Expand All @@ -90,8 +98,8 @@ public AnsiStringMarshaller(string? str, Span<byte> buffer)
/// </remarks>
public void FreeNative()
{
if (_allocated != null)
Marshal.FreeCoTaskMem((IntPtr)_allocated);
if (_allocated)
Marshal.FreeCoTaskMem((IntPtr)_nativeValue);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,64 +9,33 @@ namespace System.Runtime.InteropServices.Marshalling
/// Marshaller for UTF-16 strings
/// </summary>
[CLSCompliant(false)]
[CustomTypeMarshaller(typeof(string), BufferSize = 0x100,
Features = CustomTypeMarshallerFeatures.UnmanagedResources | CustomTypeMarshallerFeatures.TwoStageMarshalling | CustomTypeMarshallerFeatures.CallerAllocatedBuffer)]
[CustomTypeMarshaller(typeof(string),
Features = CustomTypeMarshallerFeatures.UnmanagedResources | CustomTypeMarshallerFeatures.TwoStageMarshalling)]
public unsafe ref struct Utf16StringMarshaller
{
private ushort* _allocated;
private readonly Span<ushort> _span;
private ushort* _nativeValue;

/// <summary>
/// Initializes a new instance of the <see cref="Utf16StringMarshaller"/>.
/// </summary>
/// <param name="str">The string to marshal.</param>
public Utf16StringMarshaller(string? str)
: this(str, default)
{
}

/// <summary>
/// Initializes a new instance of the <see cref="Utf16StringMarshaller"/>.
/// </summary>
/// <param name="str">The string to marshal.</param>
/// <param name="buffer">Buffer that may be used for marshalling.</param>
/// <remarks>
/// The <paramref name="buffer"/> must not be movable - that is, it should not be
/// on the managed heap or it should be pinned.
/// <seealso cref="CustomTypeMarshallerFeatures.CallerAllocatedBuffer"/>
/// The caller allocated constructor option is not provided because
/// pinning should be preferred for UTF-16 scenarios.
/// </remarks>
public Utf16StringMarshaller(string? str, Span<ushort> buffer)
/// <param name="str">The string to marshal.</param>
public Utf16StringMarshaller(string? str)
{
_allocated = null;
if (str is null)
{
_span = default;
_nativeValue = null;
return;
}

// + 1 for null terminator
if (buffer.Length >= str.Length + 1)
{
_span = buffer;
str.CopyTo(MemoryMarshal.Cast<ushort, char>(buffer));
_span[str.Length] = '\0'; // null-terminate
}
else
{
_allocated = (ushort*)Marshal.StringToCoTaskMemUni(str);
_span = default;
}
}

/// <summary>
/// Returns a reference to the marshalled string.
/// </summary>
public ref ushort GetPinnableReference()
{
if (_allocated != null)
return ref Unsafe.AsRef<ushort>(_allocated);
_nativeValue = (ushort*)Marshal.AllocCoTaskMem((str.Length + 1) * sizeof(ushort));

return ref _span.GetPinnableReference();
str.CopyTo(new Span<char>(_nativeValue, str.Length));
_nativeValue[str.Length] = '\0'; // null-terminate
}

/// <summary>
Expand All @@ -75,7 +44,7 @@ public ref ushort GetPinnableReference()
/// <remarks>
/// <seealso cref="CustomTypeMarshallerFeatures.TwoStageMarshalling"/>
/// </remarks>
public ushort* ToNativeValue() => _allocated != null ? _allocated : (ushort*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(_span));
public ushort* ToNativeValue() => _nativeValue;

/// <summary>
/// Sets the native value representing the string.
Expand All @@ -84,15 +53,15 @@ public ref ushort GetPinnableReference()
/// <remarks>
/// <seealso cref="CustomTypeMarshallerFeatures.TwoStageMarshalling"/>
/// </remarks>
public void FromNativeValue(ushort* value) => _allocated = value;
public void FromNativeValue(ushort* value) => _nativeValue = value;

/// <summary>
/// Returns the managed string.
/// </summary>
/// <remarks>
/// <seealso cref="CustomTypeMarshallerDirection.Out"/>
/// </remarks>
public string? ToManaged() => _allocated == null ? null : new string((char*)_allocated);
public string? ToManaged() => Marshal.PtrToStringUni((IntPtr)_nativeValue);

/// <summary>
/// Frees native resources.
Expand All @@ -102,8 +71,7 @@ public ref ushort GetPinnableReference()
/// </remarks>
public void FreeNative()
{
if (_allocated != null)
Marshal.FreeCoTaskMem((IntPtr)_allocated);
Marshal.FreeCoTaskMem((IntPtr)_nativeValue);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ namespace System.Runtime.InteropServices.Marshalling
Features = CustomTypeMarshallerFeatures.UnmanagedResources | CustomTypeMarshallerFeatures.TwoStageMarshalling | CustomTypeMarshallerFeatures.CallerAllocatedBuffer)]
public unsafe ref struct Utf8StringMarshaller
{
private byte* _allocated;
private readonly Span<byte> _span;
private byte* _nativeValue;
private bool _allocated;

/// <summary>
/// Initializes a new instance of the <see cref="Utf8StringMarshaller"/>.
Expand All @@ -37,32 +37,32 @@ public Utf8StringMarshaller(string? str)
/// </remarks>
public Utf8StringMarshaller(string? str, Span<byte> buffer)
{
_allocated = null;
_allocated = false;

if (str is null)
{
_span = default;
_nativeValue = null;
return;
}

// + 1 for null terminator
int maxByteCount = Encoding.UTF8.GetMaxByteCount(str.Length) + 1;
if (buffer.Length >= maxByteCount)
{
int byteCount = Encoding.UTF8.GetBytes(str, buffer);
buffer[byteCount] = 0; // null-terminate
_span = buffer;
}
else
const int MaxUtf8BytesPerChar = 3;

// >= for null terminator
if ((long)MaxUtf8BytesPerChar * str.Length >= buffer.Length)
{
_allocated = (byte*)Marshal.AllocCoTaskMem(maxByteCount);
int byteCount;
fixed (char* ptr = str)
// Calculate accurate byte count when the provided stack-allocated buffer is not sufficient
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The built-in marshallers use exact-sized buffers for some cases and conservatively-sized buffer for other cases. For example, check

// Otherwise we use a slower "2-pass" mode where we first marshal the string into an intermediate buffer
// (managed byte array) and then allocate exactly the right amount of unmanaged memory. This is to avoid
// wasting memory on systems with multibyte character sets where the buffer we end up with is often much
.

I think we should just do the exact-sized buffers everywhere. The long strings are rare. When the long strings do show up, the 3x over-allocation for them feels like a lot.

Copy link
Member Author

@jkotas jkotas May 8, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am wondering whether it would make sense to fix the Marshal methods to do the exact-sized allocations as well.

int exactByteCount = checked(Encoding.UTF8.GetByteCount(str) + 1); // + 1 for null terminator
if (exactByteCount > buffer.Length)
{
byteCount = Encoding.UTF8.GetBytes(ptr, str.Length, _allocated, maxByteCount);
buffer = new Span<byte>((byte*)Marshal.AllocCoTaskMem(exactByteCount), exactByteCount);
_allocated = true;
}
_allocated[byteCount] = 0; // null-terminate
_span = default;
}

_nativeValue = (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(buffer));

int byteCount = Encoding.UTF8.GetBytes(str, buffer);
buffer[byteCount] = 0; // null-terminate
}

/// <summary>
Expand All @@ -71,7 +71,7 @@ public Utf8StringMarshaller(string? str, Span<byte> buffer)
/// <remarks>
/// <seealso cref="CustomTypeMarshallerFeatures.TwoStageMarshalling"/>
/// </remarks>
public byte* ToNativeValue() => _allocated != null ? _allocated : (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(_span));
public byte* ToNativeValue() => _nativeValue;

/// <summary>
/// Sets the native value representing the string.
Expand All @@ -80,15 +80,19 @@ public Utf8StringMarshaller(string? str, Span<byte> buffer)
/// <remarks>
/// <seealso cref="CustomTypeMarshallerFeatures.TwoStageMarshalling"/>
/// </remarks>
public void FromNativeValue(byte* value) => _allocated = value;
public void FromNativeValue(byte* value)
{
_nativeValue = value;
_allocated = true;
}

/// <summary>
/// Returns the managed string.
/// </summary>
/// <remarks>
/// <seealso cref="CustomTypeMarshallerDirection.Out"/>
/// </remarks>
public string? ToManaged() => _allocated == null ? null : Marshal.PtrToStringUTF8((IntPtr)_allocated);
public string? ToManaged() => Marshal.PtrToStringUTF8((IntPtr)_nativeValue);

/// <summary>
/// Frees native resources.
Expand All @@ -98,8 +102,8 @@ public Utf8StringMarshaller(string? str, Span<byte> buffer)
/// </remarks>
public void FreeNative()
{
if (_allocated != null)
Marshal.FreeCoTaskMem((IntPtr)_allocated);
if (_allocated)
Marshal.FreeCoTaskMem((IntPtr)_nativeValue);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2187,15 +2187,12 @@ public void FromNativeValue(byte* value) { }
public void FreeNative() { }
}
[System.CLSCompliant(false)]
[System.Runtime.InteropServices.Marshalling.CustomTypeMarshallerAttribute(typeof(string), BufferSize = 0x100,
[System.Runtime.InteropServices.Marshalling.CustomTypeMarshallerAttribute(typeof(string),
Features = System.Runtime.InteropServices.Marshalling.CustomTypeMarshallerFeatures.UnmanagedResources
| System.Runtime.InteropServices.Marshalling.CustomTypeMarshallerFeatures.CallerAllocatedBuffer
| System.Runtime.InteropServices.Marshalling.CustomTypeMarshallerFeatures.TwoStageMarshalling )]
public unsafe ref struct Utf16StringMarshaller
{
public Utf16StringMarshaller(string? str) { }
public Utf16StringMarshaller(string? str, System.Span<ushort> buffer) { }
public ref ushort GetPinnableReference() { throw null; }
public ushort* ToNativeValue() { throw null; }
public void FromNativeValue(ushort* value) { }
public string? ToManaged() { throw null; }
Expand Down