Skip to content

Commit

Permalink
Add debug-only use of new AssemblyBuilder.Save in Regex.CompileToAsse…
Browse files Browse the repository at this point in the history
…mbly (#96462)

* Add debug-only use of new AssemblyBuilder.Save in Regex.CompileToAssembly

To aid in debugging RegexCompiler issues and to help vet the new AssemblyBuilder.Save support.

* Fix IL2121 warnings

---------

Co-authored-by: Sven Boemer <sbomer@gmail.com>
  • Loading branch information
stephentoub and sbomer authored Jan 9, 2024
1 parent 315a2a8 commit b4ec422
Show file tree
Hide file tree
Showing 8 changed files with 349 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,6 @@ internal void AppendMetadata(MethodBodyStreamEncoder methodBodyEncoder)
}
}

[UnconditionalSuppressMessage("ReflectionAnalysis", "IL2072:DynamicallyAccessedMembers", Justification = "Members are retrieved from internal cache")]
private void WriteInterfaceImplementations(TypeBuilderImpl typeBuilder, TypeDefinitionHandle typeHandle)
{
if (typeBuilder._interfaces != null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,14 @@ private void ValidateAllAbstractMethodsAreImplemented()
}

[UnconditionalSuppressMessage("ReflectionAnalysis", "IL2065:DynamicallyAccessedMembers", Justification = "Methods are loaded from this TypeBuilder. The interface methods should be available at this point")]
[UnconditionalSuppressMessage("ReflectionAnalysis", "IL2075:DynamicallyAccessedMembers", Justification = "The interface methods should be available at this point")]
[UnconditionalSuppressMessage("ReflectionAnalysis", "IL2085:DynamicallyAccessedMembers", Justification = "Methods are loaded from this TypeBuilder")]
private void CheckInterfaces(Type[] _interfaces)
{
foreach (Type interfaceType in _interfaces)
{
#pragma warning disable IL2075 // Analyzer produces a different warning code than illink. The IL2065 suppression takes care of illink: https://github.com/dotnet/runtime/issues/96646
MethodInfo[] interfaceMethods = interfaceType.GetMethods(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance);
#pragma warning restore IL2075
for (int i = 0; i < interfaceMethods.Length; i++)
{
MethodInfo interfaceMethod = interfaceMethods[i];
Expand All @@ -195,7 +196,9 @@ private void CheckInterfaces(Type[] _interfaces)
}

// Check parent interfaces too
#pragma warning disable IL2075 // Analyzer produces a different warning code than illink. The IL2065 suppression takes care of illink: https://github.com/dotnet/runtime/issues/96646
CheckInterfaces(interfaceType.GetInterfaces());
#pragma warning restore IL2075
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
<Compile Include="System\Text\RegularExpressions\Regex.Split.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.EnumerateMatches.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.Timeout.cs" />
<Compile Include="System\Text\RegularExpressions\RegexAssemblyCompiler.cs" />
<Compile Include="System\Text\RegularExpressions\RegexCaseBehavior.cs" />
<Compile Include="System\Text\RegularExpressions\RegexCaseEquivalences.Data.cs" />
<Compile Include="System\Text\RegularExpressions\RegexCaseEquivalences.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,41 @@ public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, Assembly
CompileToAssembly(regexinfos, assemblyname, attributes, null);

[Obsolete(Obsoletions.RegexCompileToAssemblyMessage, DiagnosticId = Obsoletions.RegexCompileToAssemblyDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, AssemblyName assemblyname, CustomAttributeBuilder[]? attributes, string? resourceFile) =>
public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, AssemblyName assemblyname, CustomAttributeBuilder[]? attributes, string? resourceFile)
{
#if DEBUG
// This code exists only to help with the development of the RegexCompiler.
// .NET no longer supports CompileToAssembly; the source generator should be used instead.
#pragma warning disable IL3050
ArgumentNullException.ThrowIfNull(assemblyname);
ArgumentNullException.ThrowIfNull(regexinfos);

var c = new RegexAssemblyCompiler(assemblyname, attributes, resourceFile);

for (int i = 0; i < regexinfos.Length; i++)
{
ArgumentNullException.ThrowIfNull(regexinfos[i]);

string pattern = regexinfos[i].Pattern;

RegexOptions options = regexinfos[i].Options | RegexOptions.Compiled; // ensure compiled is set; it enables more optimization specific to compilation

string fullname = regexinfos[i].Namespace.Length == 0 ?
regexinfos[i].Name :
regexinfos[i].Namespace + "." + regexinfos[i].Name;

RegexTree tree = RegexParser.Parse(pattern, options, (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture);
RegexInterpreterCode code = RegexWriter.Write(tree);

c.GenerateRegexType(pattern, options, fullname, regexinfos[i].IsPublic, tree, code, regexinfos[i].MatchTimeout);
}

c.Save(assemblyname.Name ?? "RegexCompileToAssembly");
#pragma warning restore IL3050
#else
throw new PlatformNotSupportedException(SR.PlatformNotSupported_CompileToAssembly);
#endif
}

/// <summary>
/// Escapes a minimal set of metacharacters (\, *, +, ?, |, {, [, (, ), ^, $, ., #, and
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Reflection;
using System.Reflection.Emit;
using System.Threading;

// WARNING:
// The code generated by RegexCompiler is not intended to be saved to disk and loaded back later.
// It accesses internal and private members of System.Text.RegularExpressions, which may change
// at any point in the future, and relies on details about the current machine/process, e.g. is
// it 32-bit or 64-bit. The generated surface area has also not been vetted. This code exists
// only for debugging purposes, to make it easier to examine the IL that RegexCompiler emits.

#if DEBUG
namespace System.Text.RegularExpressions
{
/// <summary>Compiles a Regex to an assembly that can be saved to disk.</summary>
[RequiresDynamicCode("The RegexAssemblyCompiler type requires dynamic code to be enabled.")]
internal sealed class RegexAssemblyCompiler : RegexCompiler
{
/// <summary>Type count used to augment generated type names to create unique names.</summary>
private static int s_typeCount;

private readonly AssemblyBuilder _assembly;
private readonly ModuleBuilder _module;
private readonly MethodInfo _save;

internal RegexAssemblyCompiler(AssemblyName an, CustomAttributeBuilder[]? attribs, string? resourceFile)
{
if (resourceFile != null)
{
// Unmanaged resources are not supported: _assembly.DefineUnmanagedResource(resourceFile);
throw new PlatformNotSupportedException();
}

// TODO: Use public API when it's available: https://github.com/dotnet/runtime/issues/15704
Type abType = Type.GetType("System.Reflection.Emit.AssemblyBuilderImpl, System.Reflection.Emit", throwOnError: true)!;
MethodInfo defineDynamicAssembly = abType.GetMethod("DefinePersistedAssembly",
BindingFlags.NonPublic | BindingFlags.Static,
[typeof(AssemblyName), typeof(Assembly), typeof(List<CustomAttributeBuilder>)]) ??
throw new InvalidOperationException("Could not find method AssemblyBuilderImpl.DefinePersistedAssembly");
_assembly = (AssemblyBuilder?)defineDynamicAssembly.Invoke(null, [an, typeof(object).Assembly, attribs is not null ? new List<CustomAttributeBuilder>(attribs) : null]) ??
throw new InvalidOperationException("DefinePersistedAssembly returned null");
_save = abType.GetMethod("Save", BindingFlags.NonPublic | BindingFlags.Instance, [typeof(string)]) ??
throw new InvalidOperationException("Could not find method AssemblyBuilderImpl.Save");

_module = _assembly.DefineDynamicModule(an.Name + ".dll");
}

internal void GenerateRegexType(string pattern, RegexOptions options, string name, bool isPublic, RegexTree tree, RegexInterpreterCode code, TimeSpan matchTimeout)
{
// Store arguments into the base type's fields
_options = options;
_regexTree = tree;

// Pick a name for the class.
string typenumString = ((uint)Interlocked.Increment(ref s_typeCount)).ToString();

// Generate the RegexRunner-derived type.
TypeBuilder regexRunnerTypeBuilder = DefineType(_module, $"{name}Runner{typenumString}", isPublic: false, isSealed: true, typeof(RegexRunner));

_ilg = DefineMethod(regexRunnerTypeBuilder, "TryFindNextPossibleStartingPosition", [typeof(ReadOnlySpan<char>)], typeof(bool), out MethodBuilder tryFindNextPossibleStartingPositionMethod);
EmitTryFindNextPossibleStartingPosition();

_ilg = DefineMethod(regexRunnerTypeBuilder, "TryMatchAtCurrentPosition", [typeof(ReadOnlySpan<char>)], typeof(bool), out MethodBuilder tryMatchAtCurrentPositionMethod);
EmitTryMatchAtCurrentPosition();

_ilg = DefineMethod(regexRunnerTypeBuilder, "Scan", [typeof(ReadOnlySpan<char>)], null, out _);
EmitScan(options, tryFindNextPossibleStartingPositionMethod, tryMatchAtCurrentPositionMethod);

Type runnerType = regexRunnerTypeBuilder.CreateType()!;

// Generate the RegexRunnerFactory-derived type.
TypeBuilder regexRunnerFactoryTypeBuilder = DefineType(_module, $"{name}Factory{typenumString}", isPublic: false, isSealed: true, typeof(RegexRunnerFactory));
_ilg = DefineMethod(regexRunnerFactoryTypeBuilder, "CreateInstance", null, typeof(RegexRunner), out _);
GenerateCreateInstance(runnerType);
Type regexRunnerFactoryType = regexRunnerFactoryTypeBuilder.CreateType()!;

// Generate the Regex-derived type.
TypeBuilder regexTypeBuilder = DefineType(_module, name, isPublic, isSealed: false, typeof(Regex));
ConstructorBuilder defaultCtorBuilder = regexTypeBuilder.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, Type.EmptyTypes);
_ilg = defaultCtorBuilder.GetILGenerator();
GenerateRegexDefaultCtor(pattern, options, regexRunnerFactoryType, tree, code, matchTimeout);
if (matchTimeout != Regex.InfiniteMatchTimeout)
{
// We only generate a constructor with a timeout parameter if the regex information supplied has a non-infinite timeout.
// If it has an infinite timeout, then the generated code is not going to respect the timeout. This is a difference from netfx,
// due to the fact that we now special-case an infinite timeout in the code generator to avoid spitting unnecessary code
// and paying for the checks at run time.
_ilg = regexTypeBuilder.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, new Type[] { typeof(TimeSpan) }).GetILGenerator();
GenerateRegexTimeoutCtor(defaultCtorBuilder, regexTypeBuilder);
}
regexTypeBuilder.CreateType();
}

/// <summary>Generates a very simple factory method.</summary>
private void GenerateCreateInstance([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type type)
{
// return new Type();
_ilg!.Emit(OpCodes.Newobj, type.GetConstructor(Type.EmptyTypes)!);
Ret();
}

private void GenerateRegexDefaultCtor(
string pattern,
RegexOptions options,
[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type regexRunnerFactoryType,
RegexTree tree,
RegexInterpreterCode code,
TimeSpan matchTimeout)
{
// Call the base ctor and store pattern, options, and factory.
// base.ctor();
// base.pattern = pattern;
// base.options = options;
// base.factory = new DerivedRegexRunnerFactory();
Ldthis();
_ilg!.Emit(OpCodes.Call, typeof(Regex).GetConstructor(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance, null, Type.EmptyTypes, Array.Empty<ParameterModifier>())!);
Ldthis();
Ldstr(pattern);
Stfld(RegexField(nameof(Regex.pattern)));
Ldthis();
Ldc((int)options);
Stfld(RegexField(nameof(Regex.roptions)));
Ldthis();
_ilg!.Emit(OpCodes.Newobj, regexRunnerFactoryType.GetConstructor(Type.EmptyTypes)!);
Stfld(RegexField(nameof(Regex.factory)));

// Store the timeout (no need to validate as it should have happened in RegexCompilationInfo)
Ldthis();
if (matchTimeout == Regex.InfiniteMatchTimeout)
{
// base.internalMatchTimeout = Regex.InfiniteMatchTimeout;
_ilg.Emit(OpCodes.Ldsfld, RegexField(nameof(Regex.InfiniteMatchTimeout)));
}
else
{
// base.internalMatchTimeout = TimeSpan.FromTick(matchTimeout.Ticks);
LdcI8(matchTimeout.Ticks);
Call(typeof(TimeSpan).GetMethod(nameof(TimeSpan.FromTicks), BindingFlags.Public | BindingFlags.Static)!);
}
Stfld(RegexField(nameof(Regex.internalMatchTimeout)));

// Set capsize, caps, capnames, capslist.
Ldthis();
Ldc(tree.CaptureCount);
Stfld(RegexField(nameof(Regex.capsize)));
if (tree.CaptureNumberSparseMapping != null)
{
// Caps = new Hashtable {{0, 0}, {1, 1}, ... };
GenerateCreateHashtable(RegexField(nameof(Regex.caps)), tree.CaptureNumberSparseMapping);
}
if (tree.CaptureNameToNumberMapping != null)
{
// CapNames = new Hashtable {{"0", 0}, {"1", 1}, ...};
GenerateCreateHashtable(RegexField(nameof(Regex.capnames)), tree.CaptureNameToNumberMapping);
}
if (tree.CaptureNames != null)
{
// capslist = new string[...];
// capslist[0] = "0";
// capslist[1] = "1";
// ...
Ldthis();
Ldc(tree.CaptureNames.Length);
_ilg.Emit(OpCodes.Newarr, typeof(string)); // create new string array
FieldInfo capslistField = RegexField(nameof(Regex.capslist));
Stfld(capslistField);
for (int i = 0; i < tree.CaptureNames.Length; i++)
{
Ldthisfld(capslistField);
Ldc(i);
Ldstr(tree.CaptureNames[i]);
_ilg.Emit(OpCodes.Stelem_Ref);
}
}

// return;
Ret();
}

private void GenerateRegexTimeoutCtor(ConstructorBuilder defaultCtorBuilder, TypeBuilder regexTypeBuilder)
{
// base.ctor();
// ValidateMatchTimeout(timeSpan);
// base.internalMatchTimeout = timeSpan;
Ldthis();
_ilg!.Emit(OpCodes.Call, defaultCtorBuilder);
_ilg.Emit(OpCodes.Ldarg_1);
Call(typeof(Regex).GetMethod(nameof(Regex.ValidateMatchTimeout), BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static)!);
Ldthis();
_ilg.Emit(OpCodes.Ldarg_1);
Stfld(RegexField(nameof(Regex.internalMatchTimeout)));
Ret();
}

internal void GenerateCreateHashtable(FieldInfo field, Hashtable ht)
{
// hashtable = new Hashtable();
Ldthis();
_ilg!.Emit(OpCodes.Newobj, typeof(Hashtable).GetConstructor(Type.EmptyTypes)!);
Stfld(field);

// hashtable.Add(key1, value1);
// hashtable.Add(key2, value2);
// ...
MethodInfo addMethod = typeof(Hashtable).GetMethod(nameof(Hashtable.Add), BindingFlags.Public | BindingFlags.Instance)!;
IDictionaryEnumerator en = ht.GetEnumerator();
while (en.MoveNext())
{
Ldthisfld(field);

if (en.Key is int key)
{
Ldc(key);
_ilg!.Emit(OpCodes.Box, typeof(int));
}
else
{
Ldstr((string)en.Key);
}

Ldc((int)en.Value!);
_ilg!.Emit(OpCodes.Box, typeof(int));
Callvirt(addMethod);
}
}

/// <summary>Gets the named instance field from the Regex type.</summary>
private static FieldInfo RegexField(string fieldname) =>
typeof(Regex).GetField(fieldname, BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Static)!;

/// <summary>Saves the assembly to a file in the current directory based on the assembly's name.</summary>
internal void Save(string fileName)
{
if (!fileName.EndsWith(".dll", StringComparison.Ordinal))
{
fileName += ".dll";
}

_save.Invoke(_assembly, [fileName]); // TODO: Use public API when it's available: https://github.com/dotnet/runtime/issues/15704
}

/// <summary>Begins the definition of a new type with a specified base class</summary>
private static TypeBuilder DefineType(
ModuleBuilder moduleBuilder,
string typeName,
bool isPublic,
bool isSealed,
[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)] Type inheritFromClass)
{
TypeAttributes attrs = TypeAttributes.Class | TypeAttributes.BeforeFieldInit | (isPublic ? TypeAttributes.Public : TypeAttributes.NotPublic);
if (isSealed)
{
attrs |= TypeAttributes.Sealed;
}

return moduleBuilder.DefineType(typeName, attrs, inheritFromClass);
}

/// <summary>Begins the definition of a new method (no args) with a specified return value.</summary>
private static ILGenerator DefineMethod(TypeBuilder typeBuilder, string methname, Type[]? parameterTypes, Type? returnType, out MethodBuilder builder)
{
builder = typeBuilder.DefineMethod(methname, MethodAttributes.Family | MethodAttributes.Virtual, returnType, parameterTypes);
return builder.GetILGenerator();
}
}
}
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -5305,7 +5305,7 @@ void EmitStackPop()
}
}

protected void EmitScan(RegexOptions options, DynamicMethod tryFindNextStartingPositionMethod, DynamicMethod tryMatchAtCurrentPositionMethod)
protected void EmitScan(RegexOptions options, MethodInfo tryFindNextStartingPositionMethod, MethodInfo tryMatchAtCurrentPositionMethod)
{
// As with the source generator, we can emit special code for common circumstances rather than always emitting
// the most general purpose scan loop. Unlike the source generator, however, code appearance isn't important
Expand Down
Loading

0 comments on commit b4ec422

Please sign in to comment.