6
6
using System . Runtime . CompilerServices ;
7
7
using System . Runtime . InteropServices ;
8
8
using System . Runtime . Intrinsics ;
9
+ using System . Text ;
9
10
10
11
namespace System . Buffers
11
12
{
@@ -61,7 +62,7 @@ public static bool StartsWith<TCaseSensitivity>(ref char matchStart, int lengthR
61
62
return false ;
62
63
}
63
64
64
- return TCaseSensitivity . Equals ( ref matchStart , candidate ) ;
65
+ return TCaseSensitivity . Equals < ValueLength8OrLongerOrUnknown > ( ref matchStart , candidate ) ;
65
66
}
66
67
67
68
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
@@ -79,13 +80,38 @@ private static bool ScalarEquals<TCaseSensitivity>(ref char matchStart, string c
79
80
return true ;
80
81
}
81
82
83
+ public interface IValueLength
84
+ {
85
+ static abstract bool AtLeast4Chars { get ; }
86
+ static abstract bool AtLeast8CharsOrUnknown { get ; }
87
+ }
88
+
89
+ public readonly struct ValueLengthLessThan4 : IValueLength
90
+ {
91
+ public static bool AtLeast4Chars => false ;
92
+ public static bool AtLeast8CharsOrUnknown => false ;
93
+ }
94
+
95
+ public readonly struct ValueLength4To7 : IValueLength
96
+ {
97
+ public static bool AtLeast4Chars => true ;
98
+ public static bool AtLeast8CharsOrUnknown => false ;
99
+ }
100
+
101
+ // "Unknown" is currently only used by Teddy when confirming matches.
102
+ public readonly struct ValueLength8OrLongerOrUnknown : IValueLength
103
+ {
104
+ public static bool AtLeast4Chars => true ;
105
+ public static bool AtLeast8CharsOrUnknown => true ;
106
+ }
107
+
82
108
public interface ICaseSensitivity
83
109
{
84
110
static abstract char TransformInput ( char input ) ;
85
111
static abstract Vector128 < byte > TransformInput ( Vector128 < byte > input ) ;
86
112
static abstract Vector256 < byte > TransformInput ( Vector256 < byte > input ) ;
87
113
static abstract Vector512 < byte > TransformInput ( Vector512 < byte > input ) ;
88
- static abstract bool Equals ( ref char matchStart , string candidate ) ;
114
+ static abstract bool Equals < TValueLength > ( ref char matchStart , string candidate ) where TValueLength : struct , IValueLength ;
89
115
}
90
116
91
117
// Performs no case transformations.
@@ -104,8 +130,41 @@ public interface ICaseSensitivity
104
130
public static Vector512 < byte > TransformInput ( Vector512 < byte > input ) => input ;
105
131
106
132
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
107
- public static bool Equals ( ref char matchStart , string candidate ) =>
108
- ScalarEquals < CaseSensitive > ( ref matchStart , candidate ) ;
133
+ public static bool Equals < TValueLength > ( ref char matchStart , string candidate )
134
+ where TValueLength : struct , IValueLength
135
+ {
136
+ Debug . Assert ( candidate . Length > 1 ) ;
137
+
138
+ ref byte first = ref Unsafe . As < char , byte > ( ref matchStart ) ;
139
+ ref byte second = ref Unsafe . As < char , byte > ( ref candidate . GetRawStringData ( ) ) ;
140
+ nuint byteLength = ( nuint ) ( uint ) candidate . Length * 2 ;
141
+
142
+ if ( TValueLength . AtLeast8CharsOrUnknown )
143
+ {
144
+ return SpanHelpers . SequenceEqual ( ref first , ref second , byteLength ) ;
145
+ }
146
+
147
+ Debug . Assert ( matchStart == candidate [ 0 ] , "This should only be called after the first character has been checked" ) ;
148
+
149
+ if ( TValueLength . AtLeast4Chars )
150
+ {
151
+ nuint offset = byteLength - sizeof ( ulong ) ;
152
+ ulong differentBits = Unsafe . ReadUnaligned < ulong > ( ref first ) - Unsafe . ReadUnaligned < ulong > ( ref second ) ;
153
+ differentBits |= Unsafe . ReadUnaligned < ulong > ( ref Unsafe . Add ( ref first , offset ) ) - Unsafe . ReadUnaligned < ulong > ( ref Unsafe . Add ( ref second , offset ) ) ;
154
+ return differentBits == 0 ;
155
+ }
156
+ else
157
+ {
158
+ Debug . Assert ( candidate . Length is 2 or 3 ) ;
159
+
160
+ // We know that the candidate is 2 or 3 characters long, and that the first character has already been checked.
161
+ // We only have to to check the last 2 characters also match.
162
+ nuint offset = byteLength - sizeof ( uint ) ;
163
+
164
+ return Unsafe . ReadUnaligned < uint > ( ref Unsafe . Add ( ref first , offset ) )
165
+ == Unsafe . ReadUnaligned < uint > ( ref Unsafe . Add ( ref second , offset ) ) ;
166
+ }
167
+ }
109
168
}
110
169
111
170
// Transforms inputs to their uppercase variants with the assumption that all input characters are ASCII letters.
@@ -125,8 +184,38 @@ public static bool Equals(ref char matchStart, string candidate) =>
125
184
public static Vector512 < byte > TransformInput ( Vector512 < byte > input ) => input & Vector512 . Create ( unchecked ( ( byte ) ~ 0x20 ) ) ;
126
185
127
186
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
128
- public static bool Equals ( ref char matchStart , string candidate ) =>
129
- ScalarEquals < CaseInsensitiveAsciiLetters > ( ref matchStart , candidate ) ;
187
+ public static bool Equals < TValueLength > ( ref char matchStart , string candidate )
188
+ where TValueLength : struct , IValueLength
189
+ {
190
+ Debug . Assert ( candidate . Length > 1 ) ;
191
+ Debug . Assert ( candidate . ToUpperInvariant ( ) == candidate ) ;
192
+
193
+ if ( TValueLength . AtLeast8CharsOrUnknown )
194
+ {
195
+ return Ascii . EqualsIgnoreCase ( ref matchStart , ref candidate . GetRawStringData ( ) , ( uint ) candidate . Length ) ;
196
+ }
197
+
198
+ ref byte first = ref Unsafe . As < char , byte > ( ref matchStart ) ;
199
+ ref byte second = ref Unsafe . As < char , byte > ( ref candidate . GetRawStringData ( ) ) ;
200
+ nuint byteLength = ( nuint ) ( uint ) candidate . Length * 2 ;
201
+
202
+ if ( TValueLength . AtLeast4Chars )
203
+ {
204
+ const ulong CaseMask = ~ 0x20002000200020u ;
205
+ nuint offset = byteLength - sizeof ( ulong ) ;
206
+ ulong differentBits = ( Unsafe . ReadUnaligned < ulong > ( ref first ) & CaseMask ) - Unsafe . ReadUnaligned < ulong > ( ref second ) ;
207
+ differentBits |= ( Unsafe . ReadUnaligned < ulong > ( ref Unsafe . Add ( ref first , offset ) ) & CaseMask ) - Unsafe . ReadUnaligned < ulong > ( ref Unsafe . Add ( ref second , offset ) ) ;
208
+ return differentBits == 0 ;
209
+ }
210
+ else
211
+ {
212
+ const uint CaseMask = ~ 0x200020u ;
213
+ nuint offset = byteLength - sizeof ( uint ) ;
214
+ uint differentBits = ( Unsafe . ReadUnaligned < uint > ( ref first ) & CaseMask ) - Unsafe . ReadUnaligned < uint > ( ref second ) ;
215
+ differentBits |= ( Unsafe . ReadUnaligned < uint > ( ref Unsafe . Add ( ref first , offset ) ) & CaseMask ) - Unsafe . ReadUnaligned < uint > ( ref Unsafe . Add ( ref second , offset ) ) ;
216
+ return differentBits == 0 ;
217
+ }
218
+ }
130
219
}
131
220
132
221
// Transforms inputs to their uppercase variants with the assumption that all input characters are ASCII.
@@ -170,8 +259,16 @@ public static Vector512<byte> TransformInput(Vector512<byte> input)
170
259
}
171
260
172
261
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
173
- public static bool Equals ( ref char matchStart , string candidate ) =>
174
- ScalarEquals < CaseInsensitiveAscii > ( ref matchStart , candidate ) ;
262
+ public static bool Equals < TValueLength > ( ref char matchStart , string candidate )
263
+ where TValueLength : struct , IValueLength
264
+ {
265
+ if ( TValueLength . AtLeast8CharsOrUnknown )
266
+ {
267
+ return Ascii . EqualsIgnoreCase ( ref matchStart , ref candidate . GetRawStringData ( ) , ( uint ) candidate . Length ) ;
268
+ }
269
+
270
+ return ScalarEquals < CaseInsensitiveAscii > ( ref matchStart , candidate ) ;
271
+ }
175
272
}
176
273
177
274
// We can't efficiently map non-ASCII inputs to their Ordinal uppercase variants,
@@ -184,8 +281,16 @@ public static bool Equals(ref char matchStart, string candidate) =>
184
281
public static Vector512 < byte > TransformInput ( Vector512 < byte > input ) => throw new UnreachableException ( ) ;
185
282
186
283
[ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
187
- public static bool Equals ( ref char matchStart , string candidate ) =>
188
- Ordinal . EqualsIgnoreCase ( ref matchStart , ref candidate . GetRawStringData ( ) , candidate . Length ) ;
284
+ public static bool Equals < TValueLength > ( ref char matchStart , string candidate )
285
+ where TValueLength : struct , IValueLength
286
+ {
287
+ if ( TValueLength . AtLeast8CharsOrUnknown )
288
+ {
289
+ return Ordinal . EqualsIgnoreCase ( ref matchStart , ref candidate . GetRawStringData ( ) , candidate . Length ) ;
290
+ }
291
+
292
+ return Ordinal . EqualsIgnoreCase_Scalar ( ref matchStart , ref candidate . GetRawStringData ( ) , candidate . Length ) ;
293
+ }
189
294
}
190
295
}
191
296
}
0 commit comments