diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs index cacb2211be5c7f..9ac34438a88cfb 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs @@ -175,8 +175,27 @@ static void AppendHashtableContents(IndentedTextWriter writer, IEnumerableEmits the code for the RunnerFactory. This is the actual logic for the regular expression. - private static void EmitRegexDerivedTypeRunnerFactory(IndentedTextWriter writer, RegexMethod rm, Dictionary requiredHelpers) + private static void EmitRegexDerivedTypeRunnerFactory(IndentedTextWriter writer, RegexMethod rm, Dictionary requiredHelpers, bool checkOverflow) { + void EnterCheckOverflow() + { + if (checkOverflow) + { + writer.WriteLine($"unchecked"); + writer.WriteLine($"{{"); + writer.Indent++; + } + } + + void ExitCheckOverflow() + { + if (checkOverflow) + { + writer.Indent--; + writer.WriteLine($"}}"); + } + } + writer.WriteLine($"/// Provides a factory for creating instances to be used by methods on ."); writer.WriteLine($"private sealed class RunnerFactory : RegexRunnerFactory"); writer.WriteLine($"{{"); @@ -211,7 +230,9 @@ private static void EmitRegexDerivedTypeRunnerFactory(IndentedTextWriter writer, writer.WriteLine($" protected override void Scan(ReadOnlySpan inputSpan)"); writer.WriteLine($" {{"); writer.Indent += 3; + EnterCheckOverflow(); (bool needsTryFind, bool needsTryMatch) = EmitScan(writer, rm); + ExitCheckOverflow(); writer.Indent -= 3; writer.WriteLine($" }}"); if (needsTryFind) @@ -223,7 +244,9 @@ private static void EmitRegexDerivedTypeRunnerFactory(IndentedTextWriter writer, writer.WriteLine($" private bool TryFindNextPossibleStartingPosition(ReadOnlySpan inputSpan)"); writer.WriteLine($" {{"); writer.Indent += 3; + EnterCheckOverflow(); EmitTryFindNextPossibleStartingPosition(writer, rm, requiredHelpers); + ExitCheckOverflow(); writer.Indent -= 3; writer.WriteLine($" }}"); } @@ -236,7 +259,9 @@ private static void EmitRegexDerivedTypeRunnerFactory(IndentedTextWriter writer, writer.WriteLine($" private bool TryMatchAtCurrentPosition(ReadOnlySpan inputSpan)"); writer.WriteLine($" {{"); writer.Indent += 3; - EmitTryMatchAtCurrentPosition(writer, rm, requiredHelpers); + EnterCheckOverflow(); + EmitTryMatchAtCurrentPosition(writer, rm, requiredHelpers, checkOverflow); + ExitCheckOverflow(); writer.Indent -= 3; writer.WriteLine($" }}"); } @@ -291,23 +316,24 @@ private static void AddIsWordCharHelper(Dictionary requiredHel } /// Adds the IsBoundary helper to the required helpers collection. - private static void AddIsBoundaryHelper(Dictionary requiredHelpers) + private static void AddIsBoundaryHelper(Dictionary requiredHelpers, bool checkOverflow) { const string IsBoundary = nameof(IsBoundary); if (!requiredHelpers.ContainsKey(IsBoundary)) { + string uncheckedKeyword = checkOverflow ? "unchecked" : ""; requiredHelpers.Add(IsBoundary, new string[] { - "/// Determines whether the specified index is a boundary.", - "[MethodImpl(MethodImplOptions.AggressiveInlining)]", - "internal static bool IsBoundary(ReadOnlySpan inputSpan, int index)", - "{", - " int indexMinus1 = index - 1;", - " return ((uint)indexMinus1 < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[indexMinus1])) !=", - " ((uint)index < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[index]));", - "", - " static bool IsBoundaryWordChar(char ch) => IsWordChar(ch) || (ch == '\\u200C' | ch == '\\u200D');", - "}", + $"/// Determines whether the specified index is a boundary.", + $"[MethodImpl(MethodImplOptions.AggressiveInlining)]", + $"internal static bool IsBoundary(ReadOnlySpan inputSpan, int index)", + $"{{", + $" int indexMinus1 = index - 1;", + $" return {uncheckedKeyword}((uint)indexMinus1 < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[indexMinus1])) !=", + $" {uncheckedKeyword}((uint)index < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[index]));", + $"", + $" static bool IsBoundaryWordChar(char ch) => IsWordChar(ch) || (ch == '\\u200C' | ch == '\\u200D');", + $"}}", }); AddIsWordCharHelper(requiredHelpers); @@ -315,27 +341,27 @@ private static void AddIsBoundaryHelper(Dictionary requiredHel } /// Adds the IsECMABoundary helper to the required helpers collection. - private static void AddIsECMABoundaryHelper(Dictionary requiredHelpers) + private static void AddIsECMABoundaryHelper(Dictionary requiredHelpers, bool checkOverflow) { const string IsECMABoundary = nameof(IsECMABoundary); if (!requiredHelpers.ContainsKey(IsECMABoundary)) { + string uncheckedKeyword = checkOverflow ? "unchecked" : ""; requiredHelpers.Add(IsECMABoundary, new string[] { - "/// Determines whether the specified index is a boundary (ECMAScript).", - "[MethodImpl(MethodImplOptions.AggressiveInlining)]", - "internal static bool IsECMABoundary(ReadOnlySpan inputSpan, int index)", - "{", - " int indexMinus1 = index - 1;", - " return ((uint)indexMinus1 < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[indexMinus1])) !=", - " ((uint)index < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[index]));", - "", - " static bool IsECMAWordChar(char ch) =>", - " ((((uint)ch - 'A') & ~0x20) < 26) || // ASCII letter", - " (((uint)ch - '0') < 10) || // digit", - " ch == '_' || // underscore", - " ch == '\\u0130'; // latin capital letter I with dot above", - "}", + $"/// Determines whether the specified index is a boundary (ECMAScript).", + $"[MethodImpl(MethodImplOptions.AggressiveInlining)]", + $"internal static bool IsECMABoundary(ReadOnlySpan inputSpan, int index)", + $"{{", + $" int indexMinus1 = index - 1;", + $" return {uncheckedKeyword}((uint)indexMinus1 < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[indexMinus1])) !=", + $" {uncheckedKeyword}((uint)index < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[index]));", + $"", + $" static bool IsECMAWordChar(char ch) =>", + $" char.IsAsciiLetterOrDigit(ch) ||", + $" ch == '_' ||", + $" ch == '\\u0130'; // latin capital letter I with dot above", + $"}}", }); } } @@ -429,7 +455,7 @@ FindNextStartingPositionMode.LeadingAnchor_RightToLeft_Start or /// Emits the body of the TryFindNextPossibleStartingPosition. private static void EmitTryFindNextPossibleStartingPosition(IndentedTextWriter writer, RegexMethod rm, Dictionary requiredHelpers) { - RegexOptions options = (RegexOptions)rm.Options; + RegexOptions options = rm.Options; RegexTree regexTree = rm.Tree; bool rtl = (options & RegexOptions.RightToLeft) != 0; @@ -1025,7 +1051,7 @@ void EmitLiteralAfterAtomicLoop() } /// Emits the body of the TryMatchAtCurrentPosition. - private static void EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, RegexMethod rm, Dictionary requiredHelpers) + private static void EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, RegexMethod rm, Dictionary requiredHelpers, bool checkOverflow) { // In .NET Framework and up through .NET Core 3.1, the code generated for RegexOptions.Compiled was effectively an unrolled // version of what RegexInterpreter would process. The RegexNode tree would be turned into a series of opcodes via @@ -2668,14 +2694,14 @@ void EmitBoundary(RegexNode node) call = node.Kind is RegexNodeKind.Boundary ? $"!{HelpersTypeName}.IsBoundary" : $"{HelpersTypeName}.IsBoundary"; - AddIsBoundaryHelper(requiredHelpers); + AddIsBoundaryHelper(requiredHelpers, checkOverflow); } else { call = node.Kind is RegexNodeKind.ECMABoundary ? $"!{HelpersTypeName}.IsECMABoundary" : $"{HelpersTypeName}.IsECMABoundary"; - AddIsECMABoundaryHelper(requiredHelpers); + AddIsECMABoundaryHelper(requiredHelpers, checkOverflow); } using (EmitBlock(writer, $"if ({call}(inputSpan, pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}))")) diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs index effe0127442616..da7d936bed5e79 100644 --- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs +++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs @@ -36,13 +36,24 @@ public partial class RegexGenerator : IIncrementalGenerator "#pragma warning disable CS0219 // Variable assigned but never used", }; + private record struct CompilationData(bool AllowUnsafe, bool CheckOverflow); + public void Initialize(IncrementalGeneratorInitializationContext context) { + // To avoid invalidating every regex's output when anything from the compilation changes, + // we extract from it the only things we care about. + IncrementalValueProvider compilationDataProvider = + context.CompilationProvider + .Select((x, _) => + x.Options is CSharpCompilationOptions options ? + new CompilationData(options.AllowUnsafe, options.CheckOverflow) : + default); + // Produces one entry per generated regex. This may be: // - Diagnostic in the case of a failure that should end the compilation // - (RegexMethod regexMethod, string runnerFactoryImplementation, Dictionary requiredHelpers) in the case of valid regex // - (RegexMethod regexMethod, string reason, Diagnostic diagnostic) in the case of a limited-support regex - IncrementalValueProvider> codeOrDiagnostics = + IncrementalValueProvider> results = context.SyntaxProvider // Find all MethodDeclarationSyntax nodes attributed with GeneratedRegex and gather the required information. @@ -52,9 +63,13 @@ public void Initialize(IncrementalGeneratorInitializationContext context) GetSemanticTargetForGeneration) .Where(static m => m is not null) + // Incorporate the compilation data, as it impacts code generation. + .Combine(compilationDataProvider) + // Generate the RunnerFactory for each regex, if possible. This is where the bulk of the implementation occurs. - .Select((state, _) => + .Select((methodOrDiagnosticAndCompilationData, _) => { + object? state = methodOrDiagnosticAndCompilationData.Left; if (state is not RegexMethod regexMethod) { Debug.Assert(state is Diagnostic); @@ -74,26 +89,16 @@ public void Initialize(IncrementalGeneratorInitializationContext context) var writer = new IndentedTextWriter(sw); writer.Indent += 2; writer.WriteLine(); - EmitRegexDerivedTypeRunnerFactory(writer, regexMethod, requiredHelpers); + EmitRegexDerivedTypeRunnerFactory(writer, regexMethod, requiredHelpers, methodOrDiagnosticAndCompilationData.Right.CheckOverflow); writer.Indent -= 2; - return (regexMethod, sw.ToString(), requiredHelpers); + return (regexMethod, sw.ToString(), requiredHelpers, methodOrDiagnosticAndCompilationData.Right); }) .Collect(); - // To avoid invalidating every regex's output when anything from the compilation changes, - // we extract from it the only things we care about: whether unsafe code is allowed, - // and a name based on the assembly's name, and only that information is then fed into - // RegisterSourceOutput along with all of the cached generated data from each regex. - IncrementalValueProvider<(bool AllowUnsafe, string? AssemblyName)> compilationDataProvider = - context.CompilationProvider - .Select((x, _) => (x.Options is CSharpCompilationOptions { AllowUnsafe: true }, x.AssemblyName)); - // When there something to output, take all the generated strings and concatenate them to output, // and raise all of the created diagnostics. - context.RegisterSourceOutput(codeOrDiagnostics.Combine(compilationDataProvider), static (context, compilationDataAndResults) => + context.RegisterSourceOutput(results, static (context, results) => { - ImmutableArray results = compilationDataAndResults.Left; - // Report any top-level diagnostics. bool allFailures = true; foreach (object result in results) @@ -150,7 +155,7 @@ public void Initialize(IncrementalGeneratorInitializationContext context) context.ReportDiagnostic(limitedSupportResult.Item3); regexMethod = limitedSupportResult.Item1; } - else if (result is ValueTuple> regexImpl) + else if (result is ValueTuple, CompilationData> regexImpl) { foreach (KeyValuePair helper in regexImpl.Item3) { @@ -214,11 +219,11 @@ public void Initialize(IncrementalGeneratorInitializationContext context) writer.WriteLine(); } } - else if (result is ValueTuple> regexImpl) + else if (result is ValueTuple, CompilationData> regexImpl) { if (!regexImpl.Item1.IsDuplicate) { - EmitRegexDerivedImplementation(writer, regexImpl.Item1, regexImpl.Item2, compilationDataAndResults.Right.AllowUnsafe); + EmitRegexDerivedImplementation(writer, regexImpl.Item1, regexImpl.Item2, regexImpl.Item4.AllowUnsafe); writer.WriteLine(); } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs index 04e9683bdd2ccf..340ed7fbea5dfe 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs @@ -68,13 +68,13 @@ internal static byte[] CreateAssemblyImage(string source, string assemblyName) } private static async Task<(Compilation, GeneratorDriverRunResult)> RunGeneratorCore( - string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default) + string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, bool checkOverflow = true, CancellationToken cancellationToken = default) { var proj = new AdhocWorkspace() .AddSolution(SolutionInfo.Create(SolutionId.CreateNewId(), VersionStamp.Create())) .AddProject("RegexGeneratorTest", "RegexGeneratorTest.dll", "C#") .WithMetadataReferences(additionalRefs is not null ? References.Concat(additionalRefs) : References) - .WithCompilationOptions(new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary, allowUnsafe: allowUnsafe) + .WithCompilationOptions(new CSharpCompilationOptions(OutputKind.DynamicallyLinkedLibrary, allowUnsafe: allowUnsafe, checkOverflow: checkOverflow) .WithNullableContextOptions(NullableContextOptions.Enable)) .WithParseOptions(new CSharpParseOptions(langVersion)) .AddDocument("RegexGenerator.g.cs", SourceText.From(code, Encoding.UTF8)).Project; @@ -91,9 +91,9 @@ internal static byte[] CreateAssemblyImage(string source, string assemblyName) } internal static async Task> RunGenerator( - string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default) + string code, bool compile = false, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, bool checkOverflow = true, CancellationToken cancellationToken = default) { - (Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, cancellationToken); + (Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, checkOverflow, cancellationToken); if (!compile) { return generatorResults.Diagnostics; @@ -114,9 +114,9 @@ internal static async Task> RunGenerator( } internal static async Task GenerateSourceText( - string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, CancellationToken cancellationToken = default) + string code, LanguageVersion langVersion = LanguageVersion.Preview, MetadataReference[]? additionalRefs = null, bool allowUnsafe = false, bool checkOverflow = true, CancellationToken cancellationToken = default) { - (Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, cancellationToken); + (Compilation comp, GeneratorDriverRunResult generatorResults) = await RunGeneratorCore(code, langVersion, additionalRefs, allowUnsafe, checkOverflow, cancellationToken); string generatedSource = string.Concat(generatorResults.GeneratedTrees.Select(t => t.ToString())); if (generatorResults.Diagnostics.Length != 0) @@ -197,6 +197,7 @@ internal static async Task SourceGenRegexAsync( .WithCompilationOptions( new CSharpCompilationOptions( OutputKind.DynamicallyLinkedLibrary, + checkOverflow: true, warningLevel: 9999, // docs recommend using "9999" to catch all warnings now and in the future specificDiagnosticOptions: ImmutableDictionary.Empty.Add("SYSLIB1044", ReportDiagnostic.Hidden)) // regex with limited support .WithNullableContextOptions(NullableContextOptions.Enable)) diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs index 642bba87ad050e..fa86ca59e60999 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs @@ -6,6 +6,7 @@ using Microsoft.DotNet.RemoteExecutor; using System.Collections.Generic; using System.Diagnostics; +using System.Linq; using System.Globalization; using System.Threading.Tasks; using Xunit; @@ -438,22 +439,28 @@ partial class C ", compile: true)); } + public static IEnumerable Valid_ClassWithNamespace_ConfigurationOptions_MemberData() => + from pattern in new[] { "", "ab", "ab*c|de*?f|(ghi){1,3}", "\\\\w\\\\W\\\\b\\\\B\\\\d\\\\D\\\\s\\\\S" } + from options in new[] { RegexOptions.None, RegexOptions.IgnoreCase, RegexOptions.ECMAScript, RegexOptions.RightToLeft } + from allowUnsafe in new[] { false, true } + from checkOverflow in new[] { false, true } + select new object[] { pattern, options, allowUnsafe, checkOverflow }; + [Theory] - [InlineData(false)] - [InlineData(true)] - public async Task Valid_ClassWithNamespace(bool allowUnsafe) + [MemberData(nameof(Valid_ClassWithNamespace_ConfigurationOptions_MemberData))] + public async Task Valid_ClassWithNamespace_ConfigurationOptions(string pattern, RegexOptions options, bool allowUnsafe, bool checkOverflow) { - Assert.Empty(await RegexGeneratorHelper.RunGenerator(@" + Assert.Empty(await RegexGeneratorHelper.RunGenerator($@" using System.Text.RegularExpressions; namespace A - { + {{ partial class C - { - [GeneratedRegex(""ab"")] + {{ + [GeneratedRegex(""{pattern}"", RegexOptions.{options})] private static partial Regex Valid(); - } - } - ", compile: true, allowUnsafe: allowUnsafe)); + }} + }} + ", compile: true, allowUnsafe: allowUnsafe, checkOverflow: checkOverflow)); } [Fact]