From 45882464eaf15c3dd13c9c776c292abb9650dcf3 Mon Sep 17 00:00:00 2001 From: Jose Perez Rodriguez Date: Wed, 1 Dec 2021 10:37:07 -0800 Subject: [PATCH 1/2] Use ReadOnlySpan on RegexCompiled engine. --- .../Text/RegularExpressions/RegexCompiler.cs | 65 ++++++++++--------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index 58d5f5f1717abd..62e6eb29a7ce94 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -71,7 +71,6 @@ internal abstract class RegexCompiler private static readonly MethodInfo s_stringAsSpanMethod = typeof(MemoryExtensions).GetMethod("AsSpan", new Type[] { typeof(string) })!; private static readonly MethodInfo s_stringAsSpanIntIntMethod = typeof(MemoryExtensions).GetMethod("AsSpan", new Type[] { typeof(string), typeof(int), typeof(int) })!; private static readonly MethodInfo s_stringGetCharsMethod = typeof(string).GetMethod("get_Chars", new Type[] { typeof(int) })!; - private static readonly MethodInfo s_stringIndexOfCharInt = typeof(string).GetMethod("IndexOf", new Type[] { typeof(char), typeof(int) })!; private static readonly MethodInfo s_stringLastIndexOfCharIntInt = typeof(string).GetMethod("LastIndexOf", new Type[] { typeof(char), typeof(int), typeof(int) })!; private static readonly MethodInfo s_textInfoToLowerMethod = typeof(TextInfo).GetMethod("ToLower", new Type[] { typeof(char) })!; private static readonly MethodInfo s_arrayResize = typeof(Array).GetMethod("Resize")!.MakeGenericMethod(typeof(int)); @@ -83,6 +82,7 @@ internal abstract class RegexCompiler private LocalBuilder? _runtextendLocal; private LocalBuilder? _runtextposLocal; private LocalBuilder? _runtextLocal; + private LocalBuilder? _runtextSpanLocal; private LocalBuilder? _runtrackposLocal; private LocalBuilder? _runtrackLocal; private LocalBuilder? _runstackposLocal; @@ -927,7 +927,7 @@ protected void GenerateFindFirstChar() { _runtextbegLocal = DeclareInt32(); } - _runtextLocal = DeclareString(); + _runtextSpanLocal = DeclareReadOnlySpanChar(); _textInfoLocal = null; if ((_options & RegexOptions.CultureInvariant) == 0) { @@ -954,8 +954,10 @@ FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive or // Load necessary locals // int runtextpos = this.runtextpos; // int runtextend = this.runtextend; + // ReadOnlySpan runtextSpan = this.runtext; Mvfldloc(s_runtextposField, _runtextposLocal); Mvfldloc(s_runtextendField, _runtextendLocal); + Mvfldloc(s_runtextField, _runtextSpanLocal); if (_code.RightToLeft) { Mvfldloc(s_runtextbegField, _runtextbegLocal!); @@ -1139,9 +1141,9 @@ bool GenerateAnchors() Ldloc(_runtextposLocal); Ldloc(_runtextendLocal); Beq(l2); - Ldthisfld(s_runtextField); + Ldloc(_runtextSpanLocal); Ldloc(_runtextposLocal); - Call(s_stringGetCharsMethod); + Call(s_spanGetItemMethod); Ldc('\n'); Beq(l2); MarkLabel(l1); @@ -1191,25 +1193,26 @@ bool GenerateAnchors() Ldthisfld(s_runtextbegField); Ble(atBeginningOfLine); - // ... && runtext[runtextpos - 1] != '\n') { ... } - Ldthisfld(s_runtextField); + // ... && runtextSpan[runtextpos - 1] != '\n') { ... } + Ldloc(_runtextSpanLocal); Ldloc(_runtextposLocal); Ldc(1); Sub(); - Call(s_stringGetCharsMethod); + Call(s_spanGetItemMethod); Ldc('\n'); Beq(atBeginningOfLine); - // int tmp = runtext.IndexOf('\n', runtextpos); - Ldthisfld(s_runtextField); - Ldc('\n'); + // int tmp = runtextSpan.Slice(runtextpos).IndexOf('\n'); + Ldloc(_runtextSpanLocal); Ldloc(_runtextposLocal); - Call(s_stringIndexOfCharInt); + Call(s_spanSliceIntMethod); + Ldc('\n'); + Call(s_spanIndexOfChar); using (RentedLocalBuilder newlinePos = RentInt32Local()) { Stloc(newlinePos); - // if (newlinePos == -1 || newlinePos + 1 > runtextend) + // if (newlinePos == -1 || newlinePos + runtextpos + 1 > runtextend) // { // runtextpos = runtextend; // return false; @@ -1218,13 +1221,17 @@ bool GenerateAnchors() Ldc(-1); Beq(returnFalse); Ldloc(newlinePos); + Ldloc(_runtextposLocal); + Add(); Ldc(1); Add(); Ldloc(_runtextendLocal); Bgt(returnFalse); - // runtextpos = newlinePos + 1; + // runtextpos = newlinePos + runtextpos + 1; Ldloc(newlinePos); + Ldloc(_runtextposLocal); + Add(); Ldc(1); Add(); Stloc(_runtextposLocal); @@ -1243,14 +1250,14 @@ void GenerateIndexOf_LeftToRight(string prefix) { using RentedLocalBuilder i = RentInt32Local(); - // int i = runtext.AsSpan(runtextpos, runtextend - runtextpos).IndexOf(prefix); + // int i = runtextSpan.Slice(runtextpos, runtextend - runtextpos).IndexOf(prefix); Ldthis(); - Ldfld(s_runtextField); + Ldloc(_runtextSpanLocal); Ldloc(_runtextposLocal); Ldloc(_runtextendLocal); Ldloc(_runtextposLocal); Sub(); - Call(s_stringAsSpanIntIntMethod); + Call(s_spanSliceIntIntMethod); Ldstr(prefix); Call(s_stringAsSpanMethod); Call(s_spanIndexOfSpan); @@ -1276,14 +1283,14 @@ void GenerateIndexOf_RightToLeft(string prefix) { using RentedLocalBuilder i = RentInt32Local(); - // int i = runtext.AsSpan(runtextpos, runtextbeg, runtextpos - runtextbeg).LastIndexOf(prefix); + // int i = runtextSpan.Slice(runtextbeg, runtextpos - runtextbeg).LastIndexOf(prefix); Ldthis(); - Ldfld(s_runtextField); + Ldloc(_runtextSpanLocal); Ldloc(_runtextbegLocal!); Ldloc(_runtextposLocal); Ldloc(_runtextbegLocal!); Sub(); - Call(s_stringAsSpanIntIntMethod); + Call(s_spanSliceIntIntMethod); Ldstr(prefix); Call(s_stringAsSpanMethod); Call(s_spanLastIndexOfSpan); @@ -1316,14 +1323,14 @@ void GenerateFixedSet_RightToLeft() if (set.Chars is { Length: 1 } && !set.CaseInsensitive) { - // int i = runtext.AsSpan(runtextpos, runtextbeg, runtextpos - runtextbeg).LastIndexOf(set.Chars[0]); + // int i = runtextSpan.Slice(runtextbeg, runtextpos - runtextbeg).LastIndexOf(set.Chars[0]); Ldthis(); - Ldfld(s_runtextField); + Ldloc(_runtextSpanLocal); Ldloc(_runtextbegLocal!); Ldloc(_runtextposLocal); Ldloc(_runtextbegLocal!); Sub(); - Call(s_stringAsSpanIntIntMethod); + Call(s_spanSliceIntIntMethod); Ldc(set.Chars[0]); Call(s_spanLastIndexOfChar); Stloc(i); @@ -1351,8 +1358,6 @@ void GenerateFixedSet_RightToLeft() Label increment = DefineLabel(); Label body = DefineLabel(); - Mvfldloc(s_runtextField, _runtextLocal); - // for (int i = runtextpos - 1; ... Ldloc(_runtextposLocal); Ldc(1); @@ -1360,11 +1365,11 @@ void GenerateFixedSet_RightToLeft() Stloc(i); BrFar(condition); - // if (MatchCharClass(runtext[i], set)) + // if (MatchCharClass(runtextSpan[i], set)) MarkLabel(body); - Ldloc(_runtextLocal); + Ldloc(_runtextSpanLocal); Ldloc(i); - Call(s_stringGetCharsMethod); + Call(s_spanGetItemMethod); EmitMatchCharacterClass(set.Set, set.CaseInsensitive); Brfalse(increment); @@ -1405,13 +1410,13 @@ void GenerateFixedSet_LeftToRight() using RentedLocalBuilder iLocal = RentInt32Local(); using RentedLocalBuilder textSpanLocal = RentReadOnlySpanCharLocal(); - // ReadOnlySpan span = this.runtext.AsSpan(runtextpos, runtextend - runtextpos); - Ldthisfld(s_runtextField); + // ReadOnlySpan span = runtextSpan.Slice(runtextpos, runtextend - runtextpos); + Ldloc(_runtextSpanLocal); Ldloc(_runtextposLocal); Ldloc(_runtextendLocal); Ldloc(_runtextposLocal); Sub(); - Call(s_stringAsSpanIntIntMethod); + Call(s_spanSliceIntIntMethod); Stloc(textSpanLocal); // If we can use IndexOf{Any}, try to accelerate the skip loop via vectorization to match the first prefix. From 91e742e4c23a5c7b0fd9af3fe7f295af38e76c92 Mon Sep 17 00:00:00 2001 From: Jose Perez Rodriguez Date: Wed, 1 Dec 2021 16:37:28 -0800 Subject: [PATCH 2/2] Fixing unit tests by correcting some bad IL. --- .../Text/RegularExpressions/RegexCompiler.cs | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index 62e6eb29a7ce94..8d2c6ee9b4a924 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -954,10 +954,12 @@ FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive or // Load necessary locals // int runtextpos = this.runtextpos; // int runtextend = this.runtextend; - // ReadOnlySpan runtextSpan = this.runtext; + // ReadOnlySpan runtextSpan = this.runtext.AsSpan(); Mvfldloc(s_runtextposField, _runtextposLocal); Mvfldloc(s_runtextendField, _runtextendLocal); - Mvfldloc(s_runtextField, _runtextSpanLocal); + Ldthisfld(s_runtextField); + Call(s_stringAsSpanMethod); + Stloc(_runtextSpanLocal); if (_code.RightToLeft) { Mvfldloc(s_runtextbegField, _runtextbegLocal!); @@ -1141,9 +1143,10 @@ bool GenerateAnchors() Ldloc(_runtextposLocal); Ldloc(_runtextendLocal); Beq(l2); - Ldloc(_runtextSpanLocal); + Ldloca(_runtextSpanLocal); Ldloc(_runtextposLocal); Call(s_spanGetItemMethod); + LdindU2(); Ldc('\n'); Beq(l2); MarkLabel(l1); @@ -1194,16 +1197,17 @@ bool GenerateAnchors() Ble(atBeginningOfLine); // ... && runtextSpan[runtextpos - 1] != '\n') { ... } - Ldloc(_runtextSpanLocal); + Ldloca(_runtextSpanLocal); Ldloc(_runtextposLocal); Ldc(1); Sub(); Call(s_spanGetItemMethod); + LdindU2(); Ldc('\n'); Beq(atBeginningOfLine); // int tmp = runtextSpan.Slice(runtextpos).IndexOf('\n'); - Ldloc(_runtextSpanLocal); + Ldloca(_runtextSpanLocal); Ldloc(_runtextposLocal); Call(s_spanSliceIntMethod); Ldc('\n'); @@ -1251,8 +1255,7 @@ void GenerateIndexOf_LeftToRight(string prefix) using RentedLocalBuilder i = RentInt32Local(); // int i = runtextSpan.Slice(runtextpos, runtextend - runtextpos).IndexOf(prefix); - Ldthis(); - Ldloc(_runtextSpanLocal); + Ldloca(_runtextSpanLocal); Ldloc(_runtextposLocal); Ldloc(_runtextendLocal); Ldloc(_runtextposLocal); @@ -1284,8 +1287,7 @@ void GenerateIndexOf_RightToLeft(string prefix) using RentedLocalBuilder i = RentInt32Local(); // int i = runtextSpan.Slice(runtextbeg, runtextpos - runtextbeg).LastIndexOf(prefix); - Ldthis(); - Ldloc(_runtextSpanLocal); + Ldloca(_runtextSpanLocal); Ldloc(_runtextbegLocal!); Ldloc(_runtextposLocal); Ldloc(_runtextbegLocal!); @@ -1324,8 +1326,7 @@ void GenerateFixedSet_RightToLeft() if (set.Chars is { Length: 1 } && !set.CaseInsensitive) { // int i = runtextSpan.Slice(runtextbeg, runtextpos - runtextbeg).LastIndexOf(set.Chars[0]); - Ldthis(); - Ldloc(_runtextSpanLocal); + Ldloca(_runtextSpanLocal); Ldloc(_runtextbegLocal!); Ldloc(_runtextposLocal); Ldloc(_runtextbegLocal!); @@ -1367,9 +1368,10 @@ void GenerateFixedSet_RightToLeft() // if (MatchCharClass(runtextSpan[i], set)) MarkLabel(body); - Ldloc(_runtextSpanLocal); + Ldloca(_runtextSpanLocal); Ldloc(i); Call(s_spanGetItemMethod); + LdindU2(); EmitMatchCharacterClass(set.Set, set.CaseInsensitive); Brfalse(increment); @@ -1411,7 +1413,7 @@ void GenerateFixedSet_LeftToRight() using RentedLocalBuilder textSpanLocal = RentReadOnlySpanCharLocal(); // ReadOnlySpan span = runtextSpan.Slice(runtextpos, runtextend - runtextpos); - Ldloc(_runtextSpanLocal); + Ldloca(_runtextSpanLocal); Ldloc(_runtextposLocal); Ldloc(_runtextendLocal); Ldloc(_runtextposLocal);