-
Notifications
You must be signed in to change notification settings - Fork 4.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add debug-only use of new AssemblyBuilder.Save in Regex.CompileToAsse…
…mbly To aid in debugging RegexCompiler issues and to help vet the new AssemblyBuilder.Save support.
- Loading branch information
1 parent
499e288
commit 6aadb60
Showing
6 changed files
with
345 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
273 changes: 273 additions & 0 deletions
273
...ystem.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexAssemblyCompiler.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,273 @@ | ||
// Licensed to the .NET Foundation under one or more agreements. | ||
// The .NET Foundation licenses this file to you under the MIT license. | ||
|
||
using System.Collections; | ||
using System.Collections.Generic; | ||
using System.Diagnostics.CodeAnalysis; | ||
using System.Reflection; | ||
using System.Reflection.Emit; | ||
using System.Threading; | ||
|
||
// WARNING: | ||
// The code generated by RegexCompiler is not intended to be saved to disk and loaded back later. | ||
// It accesses internal and private members of System.Text.RegularExpressions, which may change | ||
// at any point in the future, and relies on details about the current machine/process, e.g. is | ||
// it 32-bit or 64-bit. The generated surface area has also not been vetted. This code exists | ||
// only for debugging purposes, to make it easier to examine the IL that RegexCompiler emits. | ||
|
||
#if DEBUG | ||
namespace System.Text.RegularExpressions | ||
{ | ||
/// <summary>Compiles a Regex to an assembly that can be saved to disk.</summary> | ||
[RequiresDynamicCode("The RegexAssemblyCompiler type requires dynamic code to be enabled.")] | ||
internal sealed class RegexAssemblyCompiler : RegexCompiler | ||
{ | ||
/// <summary>Type count used to augment generated type names to create unique names.</summary> | ||
private static int s_typeCount; | ||
|
||
private readonly AssemblyBuilder _assembly; | ||
private readonly ModuleBuilder _module; | ||
private readonly MethodInfo _save; | ||
|
||
internal RegexAssemblyCompiler(AssemblyName an, CustomAttributeBuilder[]? attribs, string? resourceFile) | ||
{ | ||
if (resourceFile != null) | ||
{ | ||
// Unmanaged resources are not supported: _assembly.DefineUnmanagedResource(resourceFile); | ||
throw new PlatformNotSupportedException(); | ||
} | ||
|
||
// TODO: Use public API when it's available: https://github.com/dotnet/runtime/issues/15704 | ||
Type abType = Type.GetType("System.Reflection.Emit.AssemblyBuilderImpl, System.Reflection.Emit", throwOnError: true)!; | ||
MethodInfo defineDynamicAssembly = abType.GetMethod("DefinePersistedAssembly", | ||
BindingFlags.NonPublic | BindingFlags.Static, | ||
[typeof(AssemblyName), typeof(Assembly), typeof(List<CustomAttributeBuilder>)]) ?? | ||
throw new InvalidOperationException("Could not find method AssemblyBuilderImpl.DefinePersistedAssembly"); | ||
_assembly = (AssemblyBuilder?)defineDynamicAssembly.Invoke(null, [an, typeof(object).Assembly, attribs is not null ? new List<CustomAttributeBuilder>(attribs) : null]) ?? | ||
throw new InvalidOperationException("DefinePersistedAssembly returned null"); | ||
_save = abType.GetMethod("Save", BindingFlags.NonPublic | BindingFlags.Instance, [typeof(string)]) ?? | ||
throw new InvalidOperationException("Could not find method AssemblyBuilderImpl.Save"); | ||
|
||
_module = _assembly.DefineDynamicModule(an.Name + ".dll"); | ||
} | ||
|
||
internal void GenerateRegexType(string pattern, RegexOptions options, string name, bool isPublic, RegexTree tree, RegexInterpreterCode code, TimeSpan matchTimeout) | ||
{ | ||
// Store arguments into the base type's fields | ||
_options = options; | ||
_regexTree = tree; | ||
|
||
// Pick a name for the class. | ||
string typenumString = ((uint)Interlocked.Increment(ref s_typeCount)).ToString(); | ||
|
||
// Generate the RegexRunner-derived type. | ||
TypeBuilder regexRunnerTypeBuilder = DefineType(_module, $"{name}Runner{typenumString}", isPublic: false, isSealed: true, typeof(RegexRunner)); | ||
|
||
_ilg = DefineMethod(regexRunnerTypeBuilder, "TryFindNextPossibleStartingPosition", [typeof(ReadOnlySpan<char>)], typeof(bool), out MethodBuilder tryFindNextPossibleStartingPositionMethod); | ||
EmitTryFindNextPossibleStartingPosition(); | ||
|
||
_ilg = DefineMethod(regexRunnerTypeBuilder, "TryMatchAtCurrentPosition", [typeof(ReadOnlySpan<char>)], typeof(bool), out MethodBuilder tryMatchAtCurrentPositionMethod); | ||
EmitTryMatchAtCurrentPosition(); | ||
|
||
_ilg = DefineMethod(regexRunnerTypeBuilder, "Scan", [typeof(ReadOnlySpan<char>)], null, out _); | ||
EmitScan(options, tryFindNextPossibleStartingPositionMethod, tryMatchAtCurrentPositionMethod); | ||
|
||
Type runnerType = regexRunnerTypeBuilder.CreateType()!; | ||
|
||
// Generate the RegexRunnerFactory-derived type. | ||
TypeBuilder regexRunnerFactoryTypeBuilder = DefineType(_module, $"{name}Factory{typenumString}", isPublic: false, isSealed: true, typeof(RegexRunnerFactory)); | ||
_ilg = DefineMethod(regexRunnerFactoryTypeBuilder, "CreateInstance", null, typeof(RegexRunner), out _); | ||
GenerateCreateInstance(runnerType); | ||
Type regexRunnerFactoryType = regexRunnerFactoryTypeBuilder.CreateType()!; | ||
|
||
// Generate the Regex-derived type. | ||
TypeBuilder regexTypeBuilder = DefineType(_module, name, isPublic, isSealed: false, typeof(Regex)); | ||
ConstructorBuilder defaultCtorBuilder = regexTypeBuilder.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, Type.EmptyTypes); | ||
_ilg = defaultCtorBuilder.GetILGenerator(); | ||
GenerateRegexDefaultCtor(pattern, options, regexRunnerFactoryType, tree, code, matchTimeout); | ||
if (matchTimeout != Regex.InfiniteMatchTimeout) | ||
{ | ||
// We only generate a constructor with a timeout parameter if the regex information supplied has a non-infinite timeout. | ||
// If it has an infinite timeout, then the generated code is not going to respect the timeout. This is a difference from netfx, | ||
// due to the fact that we now special-case an infinite timeout in the code generator to avoid spitting unnecessary code | ||
// and paying for the checks at run time. | ||
_ilg = regexTypeBuilder.DefineConstructor(MethodAttributes.Public, CallingConventions.Standard, new Type[] { typeof(TimeSpan) }).GetILGenerator(); | ||
GenerateRegexTimeoutCtor(defaultCtorBuilder, regexTypeBuilder); | ||
} | ||
regexTypeBuilder.CreateType(); | ||
} | ||
|
||
/// <summary>Generates a very simple factory method.</summary> | ||
private void GenerateCreateInstance([DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type type) | ||
{ | ||
// return new Type(); | ||
_ilg!.Emit(OpCodes.Newobj, type.GetConstructor(Type.EmptyTypes)!); | ||
Ret(); | ||
} | ||
|
||
private void GenerateRegexDefaultCtor( | ||
string pattern, | ||
RegexOptions options, | ||
[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.PublicParameterlessConstructor)] Type regexRunnerFactoryType, | ||
RegexTree tree, | ||
RegexInterpreterCode code, | ||
TimeSpan matchTimeout) | ||
{ | ||
// Call the base ctor and store pattern, options, and factory. | ||
// base.ctor(); | ||
// base.pattern = pattern; | ||
// base.options = options; | ||
// base.factory = new DerivedRegexRunnerFactory(); | ||
Ldthis(); | ||
_ilg!.Emit(OpCodes.Call, typeof(Regex).GetConstructor(BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance, null, Type.EmptyTypes, Array.Empty<ParameterModifier>())!); | ||
Ldthis(); | ||
Ldstr(pattern); | ||
Stfld(RegexField(nameof(Regex.pattern))); | ||
Ldthis(); | ||
Ldc((int)options); | ||
Stfld(RegexField(nameof(Regex.roptions))); | ||
Ldthis(); | ||
_ilg!.Emit(OpCodes.Newobj, regexRunnerFactoryType.GetConstructor(Type.EmptyTypes)!); | ||
Stfld(RegexField(nameof(Regex.factory))); | ||
|
||
// Store the timeout (no need to validate as it should have happened in RegexCompilationInfo) | ||
Ldthis(); | ||
if (matchTimeout == Regex.InfiniteMatchTimeout) | ||
{ | ||
// base.internalMatchTimeout = Regex.InfiniteMatchTimeout; | ||
_ilg.Emit(OpCodes.Ldsfld, RegexField(nameof(Regex.InfiniteMatchTimeout))); | ||
} | ||
else | ||
{ | ||
// base.internalMatchTimeout = TimeSpan.FromTick(matchTimeout.Ticks); | ||
LdcI8(matchTimeout.Ticks); | ||
Call(typeof(TimeSpan).GetMethod(nameof(TimeSpan.FromTicks), BindingFlags.Public | BindingFlags.Static)!); | ||
} | ||
Stfld(RegexField(nameof(Regex.internalMatchTimeout))); | ||
|
||
// Set capsize, caps, capnames, capslist. | ||
Ldthis(); | ||
Ldc(tree.CaptureCount); | ||
Stfld(RegexField(nameof(Regex.capsize))); | ||
if (tree.CaptureNumberSparseMapping != null) | ||
{ | ||
// Caps = new Hashtable {{0, 0}, {1, 1}, ... }; | ||
GenerateCreateHashtable(RegexField(nameof(Regex.caps)), tree.CaptureNumberSparseMapping); | ||
} | ||
if (tree.CaptureNameToNumberMapping != null) | ||
{ | ||
// CapNames = new Hashtable {{"0", 0}, {"1", 1}, ...}; | ||
GenerateCreateHashtable(RegexField(nameof(Regex.capnames)), tree.CaptureNameToNumberMapping); | ||
} | ||
if (tree.CaptureNames != null) | ||
{ | ||
// capslist = new string[...]; | ||
// capslist[0] = "0"; | ||
// capslist[1] = "1"; | ||
// ... | ||
Ldthis(); | ||
Ldc(tree.CaptureNames.Length); | ||
_ilg.Emit(OpCodes.Newarr, typeof(string)); // create new string array | ||
FieldInfo capslistField = RegexField(nameof(Regex.capslist)); | ||
Stfld(capslistField); | ||
for (int i = 0; i < tree.CaptureNames.Length; i++) | ||
{ | ||
Ldthisfld(capslistField); | ||
Ldc(i); | ||
Ldstr(tree.CaptureNames[i]); | ||
_ilg.Emit(OpCodes.Stelem_Ref); | ||
} | ||
} | ||
|
||
// return; | ||
Ret(); | ||
} | ||
|
||
private void GenerateRegexTimeoutCtor(ConstructorBuilder defaultCtorBuilder, TypeBuilder regexTypeBuilder) | ||
{ | ||
// base.ctor(); | ||
// ValidateMatchTimeout(timeSpan); | ||
// base.internalMatchTimeout = timeSpan; | ||
Ldthis(); | ||
_ilg!.Emit(OpCodes.Call, defaultCtorBuilder); | ||
_ilg.Emit(OpCodes.Ldarg_1); | ||
Call(typeof(Regex).GetMethod(nameof(Regex.ValidateMatchTimeout), BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static)!); | ||
Ldthis(); | ||
_ilg.Emit(OpCodes.Ldarg_1); | ||
Stfld(RegexField(nameof(Regex.internalMatchTimeout))); | ||
Ret(); | ||
} | ||
|
||
internal void GenerateCreateHashtable(FieldInfo field, Hashtable ht) | ||
{ | ||
// hashtable = new Hashtable(); | ||
Ldthis(); | ||
_ilg!.Emit(OpCodes.Newobj, typeof(Hashtable).GetConstructor(Type.EmptyTypes)!); | ||
Stfld(field); | ||
|
||
// hashtable.Add(key1, value1); | ||
// hashtable.Add(key2, value2); | ||
// ... | ||
MethodInfo addMethod = typeof(Hashtable).GetMethod(nameof(Hashtable.Add), BindingFlags.Public | BindingFlags.Instance)!; | ||
IDictionaryEnumerator en = ht.GetEnumerator(); | ||
while (en.MoveNext()) | ||
{ | ||
Ldthisfld(field); | ||
|
||
if (en.Key is int key) | ||
{ | ||
Ldc(key); | ||
_ilg!.Emit(OpCodes.Box, typeof(int)); | ||
} | ||
else | ||
{ | ||
Ldstr((string)en.Key); | ||
} | ||
|
||
Ldc((int)en.Value!); | ||
_ilg!.Emit(OpCodes.Box, typeof(int)); | ||
Callvirt(addMethod); | ||
} | ||
} | ||
|
||
/// <summary>Gets the named instance field from the Regex type.</summary> | ||
private static FieldInfo RegexField(string fieldname) => | ||
typeof(Regex).GetField(fieldname, BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance | BindingFlags.Static)!; | ||
|
||
/// <summary>Saves the assembly to a file in the current directory based on the assembly's name.</summary> | ||
internal void Save(string fileName) | ||
{ | ||
if (!fileName.EndsWith(".dll", StringComparison.Ordinal)) | ||
{ | ||
fileName += ".dll"; | ||
} | ||
|
||
_save.Invoke(_assembly, [fileName]); // TODO: Use public API when it's available: https://github.com/dotnet/runtime/issues/15704 | ||
} | ||
|
||
/// <summary>Begins the definition of a new type with a specified base class</summary> | ||
private static TypeBuilder DefineType( | ||
ModuleBuilder moduleBuilder, | ||
string typeName, | ||
bool isPublic, | ||
bool isSealed, | ||
[DynamicallyAccessedMembers(DynamicallyAccessedMemberTypes.All)] Type inheritFromClass) | ||
{ | ||
TypeAttributes attrs = TypeAttributes.Class | TypeAttributes.BeforeFieldInit | (isPublic ? TypeAttributes.Public : TypeAttributes.NotPublic); | ||
if (isSealed) | ||
{ | ||
attrs |= TypeAttributes.Sealed; | ||
} | ||
|
||
return moduleBuilder.DefineType(typeName, attrs, inheritFromClass); | ||
} | ||
|
||
/// <summary>Begins the definition of a new method (no args) with a specified return value.</summary> | ||
private static ILGenerator DefineMethod(TypeBuilder typeBuilder, string methname, Type[]? parameterTypes, Type? returnType, out MethodBuilder builder) | ||
{ | ||
builder = typeBuilder.DefineMethod(methname, MethodAttributes.Family | MethodAttributes.Virtual, returnType, parameterTypes); | ||
return builder.GetILGenerator(); | ||
} | ||
} | ||
} | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.