Skip to content

Commit

Permalink
Add debug-only use of new AssemblyBuilder.Save in Regex.CompileToAsse…
Browse files Browse the repository at this point in the history
…mbly

To aid in debugging RegexCompiler issues and to help vet the new AssemblyBuilder.Save support.
  • Loading branch information
stephentoub committed Jan 8, 2024
1 parent 499e288 commit 6aadb60
Show file tree
Hide file tree
Showing 6 changed files with 345 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
<Compile Include="System\Text\RegularExpressions\Regex.Split.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.EnumerateMatches.cs" />
<Compile Include="System\Text\RegularExpressions\Regex.Timeout.cs" />
<Compile Include="System\Text\RegularExpressions\RegexAssemblyCompiler.cs" />
<Compile Include="System\Text\RegularExpressions\RegexCaseBehavior.cs" />
<Compile Include="System\Text\RegularExpressions\RegexCaseEquivalences.Data.cs" />
<Compile Include="System\Text\RegularExpressions\RegexCaseEquivalences.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,41 @@ public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, Assembly
CompileToAssembly(regexinfos, assemblyname, attributes, null);

[Obsolete(Obsoletions.RegexCompileToAssemblyMessage, DiagnosticId = Obsoletions.RegexCompileToAssemblyDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, AssemblyName assemblyname, CustomAttributeBuilder[]? attributes, string? resourceFile) =>
public static void CompileToAssembly(RegexCompilationInfo[] regexinfos, AssemblyName assemblyname, CustomAttributeBuilder[]? attributes, string? resourceFile)
{
#if DEBUG
// This code exists only to help with the development of the RegexCompiler.
// .NET no longer supports CompileToAssembly; the source generator should be used instead.
#pragma warning disable IL3050
ArgumentNullException.ThrowIfNull(assemblyname);
ArgumentNullException.ThrowIfNull(regexinfos);

var c = new RegexAssemblyCompiler(assemblyname, attributes, resourceFile);

for (int i = 0; i < regexinfos.Length; i++)
{
ArgumentNullException.ThrowIfNull(regexinfos[i]);

string pattern = regexinfos[i].Pattern;

RegexOptions options = regexinfos[i].Options | RegexOptions.Compiled; // ensure compiled is set; it enables more optimization specific to compilation

string fullname = regexinfos[i].Namespace.Length == 0 ?
regexinfos[i].Name :
regexinfos[i].Namespace + "." + regexinfos[i].Name;

RegexTree tree = RegexParser.Parse(pattern, options, (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture);
RegexInterpreterCode code = RegexWriter.Write(tree);

c.GenerateRegexType(pattern, options, fullname, regexinfos[i].IsPublic, tree, code, regexinfos[i].MatchTimeout);
}

c.Save(assemblyname.Name ?? "RegexCompileToAssembly");
#pragma warning restore IL3050
#else
throw new PlatformNotSupportedException(SR.PlatformNotSupported_CompileToAssembly);
#endif
}

/// <summary>
/// Escapes a minimal set of metacharacters (\, *, +, ?, |, {, [, (, ), ^, $, ., #, and
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,273 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Reflection;
using System.Reflection.Emit;
using System.Threading;

// WARNING:
// The code generated by RegexCompiler is not intended to be saved to disk and loaded back later.
// It accesses internal and private members of System.Text.RegularExpressions, which may change
// at any point in the future, and relies on details about the current machine/process, e.g. is
// it 32-bit or 64-bit. The generated surface area has also not been vetted. This code exists
// only for debugging purposes, to make it easier to examine the IL that RegexCompiler emits.

#if DEBUG
namespace System.Text.RegularExpressions
{
/// <summary>Compiles a Regex to an assembly that can be saved to disk.</summary>
[RequiresDynamicCode("The RegexAssemblyCompiler type requires dynamic code to be enabled.")]
internal sealed class RegexAssemblyCompiler : RegexCompiler
{
/// <summary>Type count used to augment generated type names to create unique names.</summary>
private static int s_typeCount;

private readonly AssemblyBuilder _assembly;
private readonly ModuleBuilder _module;
private readonly MethodInfo _save;

internal RegexAssemblyCompiler(AssemblyName an, CustomAttributeBuilder[]? attribs, string? resourceFile)
{
if (resourceFile != null)
{
// Unmanaged resources are not supported: _assembly.DefineUnmanagedResource(resourceFile);
throw new PlatformNotSupportedException();
}

// TODO: Use public API when it's available: https://github.com/dotnet/runtime/issues/15704
Type abType = Type.GetType("System.Reflection.Emit.AssemblyBuilderImpl, System.Reflection.Emit", throwOnError: true)!;
MethodInfo defineDynamicAssembly = abType.GetMethod("DefinePersistedAssembly",
BindingFlags.NonPublic | BindingFlags.Static,
[typeof(AssemblyName), typeof(Assembly), typeof(List<CustomAttributeBuilder>)]) ??
throw new InvalidOperationException("Could not find method AssemblyBuilderImpl.DefinePersistedAssembly");
_assembly = (AssemblyBuilder?)defineDynamicAssembly.Invoke(null, [an, typeof(object).Assembly, attribs is not null ? new List<CustomAttributeBuilder>(attribs) : null]) ??
throw new InvalidOperationException("DefinePersistedAssembly returned null");
_save = abType.GetMethod("Save", BindingFlags.NonPublic | BindingFlags.Instance, [typeof(string)]) ??
throw new InvalidOperationException("Could not find method AssemblyBuilderImpl.Save");

_module = _assembly.DefineDynamicModule(an.Name + ".dll");
}

internal void GenerateRegexType(string pattern, RegexOptions options, string name, bool isPublic, RegexTree tree, RegexInterpreterCode code, TimeSpan matchTimeout)
{
// Store arguments into the base type's fields
_options = options;
_regexTree = tree;

// Pick a name for the class.
string typenumString = ((uint)Interlocked.Increment(ref s_typeCount)).ToString();

// Generate the RegexRunner-derived type.
TypeBuilder regexRunnerTypeBuilder = DefineType(_module, $"{name}Runner{typenumString}", isPublic: false, isSealed: true, typeof(RegexRunner));

_ilg = DefineMethod(regexRunnerTypeBuilder, "TryFindNextPossibleStartingPosition", [typeof(ReadOnlySpan<char>)], typeof(bool), out MethodBuilder tryFindNextPossibleStartingPositionMethod);
EmitTryFindNextPossibleStartingPosition();

_ilg = DefineMethod(regexRunnerTypeBuilder, "TryMatchAtCurrentPosition", [typeof(ReadOnlySpan<char>)], typeof(bool), out MethodBuilder tryMatchAtCurrentPositionMethod);
EmitTryMatchAtCurrentPosition();

_ilg = DefineMethod(regexRunnerTypeBuilder, "Scan", [typeof(ReadOnlySpan<char>)], null, out _);
EmitScan(options, tryFindNextPossibleStartingPositionMethod, tryMatchAtCurrentPositionMethod);

Type runnerType = regexRunnerTypeBuilder.CreateType()!;

// Generate the RegexRunnerFactory-derived type.
TypeBuilder regexRunnerFactoryTypeBuilder = DefineType(_module, $"{name}Factory{typenumString}", isPublic: false, isSealed: true, typeof(RegexRunnerFactory));
_ilg = DefineMethod(regexRunnerFactoryTypeBuilder, "CreateInstance", null, typeof(RegexRunner), out _);
GenerateCreateInstance(runnerType);
Type regexRunnerFactoryType = regexRunnerFactoryTypeBuilder.CreateType()!;

// Generate the Regex-derived type.
TypeBuilder regexTypeBuilder = DefineType(_module, name, isPublic, isSealed: false, typeof(Regex));
ConstructorBuilder defaultCtorBuilder = regexTypeBuilder.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, Type.EmptyTypes);
_ilg = defaultCtorBuilder.GetILGenerator();
GenerateRegexDefaultCtor(pattern, options, regexRunnerFactoryType, tree, code, matchTimeout);
if (matchTimeout != Regex.InfiniteMatchTimeout)
{
// We only generate a constructor with a timeout parameter if the regex information supplied has a non-infinite timeout.
// If it has an infinite timeout, then the generated code is not going to respect the timeout. This is a difference from netfx,
// due to the fact that we now special-case an infinite timeout in the code generator to avoid spitting unnecessary code
// and paying for the checks at run time.
_ilg = regexTypeBuilder.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, new Type[] { typeof(TimeSpan) }).GetILGenerator();
GenerateRegexTimeoutCtor(defaultCtorBuilder, regexTypeBuilder);
}
regexTypeBuilder.CreateType();
}

/// <summary>Generates a very simple factory method.</summary>
private void GenerateCreateInstance([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type type)
{
// return new Type();
_ilg!.Emit(OpCodes.Newobj, type.GetConstructor(Type.EmptyTypes)!);
Ret();
}

private void GenerateRegexDefaultCtor(
string pattern,
RegexOptions options,
[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type regexRunnerFactoryType,
RegexTree tree,
RegexInterpreterCode code,
TimeSpan matchTimeout)
{
// Call the base ctor and store pattern, options, and factory.
// base.ctor();
// base.pattern = pattern;
// base.options = options;
// base.factory = new DerivedRegexRunnerFactory();
Ldthis();
_ilg!.Emit(OpCodes.Call, typeof(Regex).GetConstructor(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance, null, Type.EmptyTypes, Array.Empty<ParameterModifier>())!);
Ldthis();
Ldstr(pattern);
Stfld(RegexField(nameof(Regex.pattern)));
Ldthis();
Ldc((int)options);
Stfld(RegexField(nameof(Regex.roptions)));
Ldthis();
_ilg!.Emit(OpCodes.Newobj, regexRunnerFactoryType.GetConstructor(Type.EmptyTypes)!);
Stfld(RegexField(nameof(Regex.factory)));

// Store the timeout (no need to validate as it should have happened in RegexCompilationInfo)
Ldthis();
if (matchTimeout == Regex.InfiniteMatchTimeout)
{
// base.internalMatchTimeout = Regex.InfiniteMatchTimeout;
_ilg.Emit(OpCodes.Ldsfld, RegexField(nameof(Regex.InfiniteMatchTimeout)));
}
else
{
// base.internalMatchTimeout = TimeSpan.FromTick(matchTimeout.Ticks);
LdcI8(matchTimeout.Ticks);
Call(typeof(TimeSpan).GetMethod(nameof(TimeSpan.FromTicks), BindingFlags.Public | BindingFlags.Static)!);
}
Stfld(RegexField(nameof(Regex.internalMatchTimeout)));

// Set capsize, caps, capnames, capslist.
Ldthis();
Ldc(tree.CaptureCount);
Stfld(RegexField(nameof(Regex.capsize)));
if (tree.CaptureNumberSparseMapping != null)
{
// Caps = new Hashtable {{0, 0}, {1, 1}, ... };
GenerateCreateHashtable(RegexField(nameof(Regex.caps)), tree.CaptureNumberSparseMapping);
}
if (tree.CaptureNameToNumberMapping != null)
{
// CapNames = new Hashtable {{"0", 0}, {"1", 1}, ...};
GenerateCreateHashtable(RegexField(nameof(Regex.capnames)), tree.CaptureNameToNumberMapping);
}
if (tree.CaptureNames != null)
{
// capslist = new string[...];
// capslist[0] = "0";
// capslist[1] = "1";
// ...
Ldthis();
Ldc(tree.CaptureNames.Length);
_ilg.Emit(OpCodes.Newarr, typeof(string)); // create new string array
FieldInfo capslistField = RegexField(nameof(Regex.capslist));
Stfld(capslistField);
for (int i = 0; i < tree.CaptureNames.Length; i++)
{
Ldthisfld(capslistField);
Ldc(i);
Ldstr(tree.CaptureNames[i]);
_ilg.Emit(OpCodes.Stelem_Ref);
}
}

// return;
Ret();
}

private void GenerateRegexTimeoutCtor(ConstructorBuilder defaultCtorBuilder, TypeBuilder regexTypeBuilder)
{
// base.ctor();
// ValidateMatchTimeout(timeSpan);
// base.internalMatchTimeout = timeSpan;
Ldthis();
_ilg!.Emit(OpCodes.Call, defaultCtorBuilder);
_ilg.Emit(OpCodes.Ldarg_1);
Call(typeof(Regex).GetMethod(nameof(Regex.ValidateMatchTimeout), BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static)!);
Ldthis();
_ilg.Emit(OpCodes.Ldarg_1);
Stfld(RegexField(nameof(Regex.internalMatchTimeout)));
Ret();
}

internal void GenerateCreateHashtable(FieldInfo field, Hashtable ht)
{
// hashtable = new Hashtable();
Ldthis();
_ilg!.Emit(OpCodes.Newobj, typeof(Hashtable).GetConstructor(Type.EmptyTypes)!);
Stfld(field);

// hashtable.Add(key1, value1);
// hashtable.Add(key2, value2);
// ...
MethodInfo addMethod = typeof(Hashtable).GetMethod(nameof(Hashtable.Add), BindingFlags.Public | BindingFlags.Instance)!;
IDictionaryEnumerator en = ht.GetEnumerator();
while (en.MoveNext())
{
Ldthisfld(field);

if (en.Key is int key)
{
Ldc(key);
_ilg!.Emit(OpCodes.Box, typeof(int));
}
else
{
Ldstr((string)en.Key);
}

Ldc((int)en.Value!);
_ilg!.Emit(OpCodes.Box, typeof(int));
Callvirt(addMethod);
}
}

/// <summary>Gets the named instance field from the Regex type.</summary>
private static FieldInfo RegexField(string fieldname) =>
typeof(Regex).GetField(fieldname, BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Static)!;

/// <summary>Saves the assembly to a file in the current directory based on the assembly's name.</summary>
internal void Save(string fileName)
{
if (!fileName.EndsWith(".dll", StringComparison.Ordinal))
{
fileName += ".dll";
}

_save.Invoke(_assembly, [fileName]); // TODO: Use public API when it's available: https://github.com/dotnet/runtime/issues/15704
}

/// <summary>Begins the definition of a new type with a specified base class</summary>
private static TypeBuilder DefineType(
ModuleBuilder moduleBuilder,
string typeName,
bool isPublic,
bool isSealed,
[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)] Type inheritFromClass)
{
TypeAttributes attrs = TypeAttributes.Class | TypeAttributes.BeforeFieldInit | (isPublic ? TypeAttributes.Public : TypeAttributes.NotPublic);
if (isSealed)
{
attrs |= TypeAttributes.Sealed;
}

return moduleBuilder.DefineType(typeName, attrs, inheritFromClass);
}

/// <summary>Begins the definition of a new method (no args) with a specified return value.</summary>
private static ILGenerator DefineMethod(TypeBuilder typeBuilder, string methname, Type[]? parameterTypes, Type? returnType, out MethodBuilder builder)
{
builder = typeBuilder.DefineMethod(methname, MethodAttributes.Family | MethodAttributes.Virtual, returnType, parameterTypes);
return builder.GetILGenerator();
}
}
}
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -5305,7 +5305,7 @@ void EmitStackPop()
}
}

protected void EmitScan(RegexOptions options, DynamicMethod tryFindNextStartingPositionMethod, DynamicMethod tryMatchAtCurrentPositionMethod)
protected void EmitScan(RegexOptions options, MethodInfo tryFindNextStartingPositionMethod, MethodInfo tryMatchAtCurrentPositionMethod)
{
// As with the source generator, we can emit special code for common circumstances rather than always emitting
// the most general purpose scan loop. Unlike the source generator, however, code appearance isn't important
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ internal sealed class RegexLWCGCompiler : RegexCompiler
/// </remarks>
private static readonly bool s_includePatternInName = Environment.GetEnvironmentVariable(IncludePatternInNamesEnvVar) == "1";

/// <summary>Parameter types for the generated Go and FindFirstChar methods.</summary>
/// <summary>Parameter types for the generated TryFindNextPossibleStartingPosition and TryMatchAtCurrentPosition methods.</summary>
private static readonly Type[] s_paramTypes = [typeof(RegexRunner), typeof(ReadOnlySpan<char>)];

/// <summary>Id number to use for the next compiled regex.</summary>
Expand Down
Loading

0 comments on commit 6aadb60

Please sign in to comment.