@@ -9,114 +9,26 @@ extension Character {
9
9
}
10
10
11
11
extension Processor {
12
- mutating func matchBuiltin (
12
+ mutating func matchBuiltinCC (
13
13
_ cc: _CharacterClassModel . Representation ,
14
- _ isInverted: Bool ,
15
- _ isStrictASCII: Bool ,
16
- _ isScalarSemantics: Bool
14
+ isInverted: Bool ,
15
+ isStrictASCII: Bool ,
16
+ isScalarSemantics: Bool
17
17
) -> Bool {
18
- guard let next = _doMatchBuiltin (
18
+ guard let next = input . _matchBuiltinCC (
19
19
cc,
20
- isInverted,
21
- isStrictASCII,
22
- isScalarSemantics
20
+ at: currentPosition,
21
+ isInverted: isInverted,
22
+ isStrictASCII: isStrictASCII,
23
+ isScalarSemantics: isScalarSemantics
23
24
) else {
24
25
signalFailure ( )
25
26
return false
26
27
}
27
28
currentPosition = next
28
29
return true
29
30
}
30
-
31
- func _doMatchBuiltin(
32
- _ cc: _CharacterClassModel . Representation ,
33
- _ isInverted: Bool ,
34
- _ isStrictASCII: Bool ,
35
- _ isScalarSemantics: Bool
36
- ) -> Input . Index ? {
37
- guard let char = load ( ) , let scalar = loadScalar ( ) else {
38
- return nil
39
- }
40
-
41
- let asciiCheck = !isStrictASCII
42
- || ( scalar. isASCII && isScalarSemantics)
43
- || char. isASCII
44
-
45
- var matched : Bool
46
- var next : Input . Index
47
- switch ( isScalarSemantics, cc) {
48
- case ( _, . anyGrapheme) :
49
- next = input. index ( after: currentPosition)
50
- case ( _, . anyScalar) :
51
- next = input. unicodeScalars. index ( after: currentPosition)
52
- case ( true , _) :
53
- next = input. unicodeScalars. index ( after: currentPosition)
54
- case ( false , _) :
55
- next = input. index ( after: currentPosition)
56
- }
57
-
58
- switch cc {
59
- case . any, . anyGrapheme:
60
- matched = true
61
- case . anyScalar:
62
- if isScalarSemantics {
63
- matched = true
64
- } else {
65
- matched = input. isOnGraphemeClusterBoundary ( next)
66
- }
67
- case . digit:
68
- if isScalarSemantics {
69
- matched = scalar. properties. numericType != nil && asciiCheck
70
- } else {
71
- matched = char. isNumber && asciiCheck
72
- }
73
- case . horizontalWhitespace:
74
- if isScalarSemantics {
75
- matched = scalar. isHorizontalWhitespace && asciiCheck
76
- } else {
77
- matched = char. _isHorizontalWhitespace && asciiCheck
78
- }
79
- case . verticalWhitespace:
80
- if isScalarSemantics {
81
- matched = scalar. isNewline && asciiCheck
82
- } else {
83
- matched = char. _isNewline && asciiCheck
84
- }
85
- case . newlineSequence:
86
- if isScalarSemantics {
87
- matched = scalar. isNewline && asciiCheck
88
- if matched && scalar == " \r "
89
- && next != input. endIndex && input. unicodeScalars [ next] == " \n " {
90
- // Match a full CR-LF sequence even in scalar semantics
91
- input. unicodeScalars. formIndex ( after: & next)
92
- }
93
- } else {
94
- matched = char. _isNewline && asciiCheck
95
- }
96
- case . whitespace:
97
- if isScalarSemantics {
98
- matched = scalar. properties. isWhitespace && asciiCheck
99
- } else {
100
- matched = char. isWhitespace && asciiCheck
101
- }
102
- case . word:
103
- if isScalarSemantics {
104
- matched = scalar. properties. isAlphabetic && asciiCheck
105
- } else {
106
- matched = char. isWordCharacter && asciiCheck
107
- }
108
- }
109
-
110
- if isInverted {
111
- matched. toggle ( )
112
- }
113
31
114
- guard matched else {
115
- return nil
116
- }
117
- return next
118
- }
119
-
120
32
func isAtStartOfLine( _ payload: AssertionPayload ) -> Bool {
121
33
if currentPosition == subjectBounds. lowerBound { return true }
122
34
switch payload. semanticLevel {
@@ -126,7 +38,7 @@ extension Processor {
126
38
return input. unicodeScalars [ input. unicodeScalars. index ( before: currentPosition) ] . isNewline
127
39
}
128
40
}
129
-
41
+
130
42
func isAtEndOfLine( _ payload: AssertionPayload ) -> Bool {
131
43
if currentPosition == subjectBounds. upperBound { return true }
132
44
switch payload. semanticLevel {
@@ -169,7 +81,7 @@ extension Processor {
169
81
return isAtStartOfLine ( payload)
170
82
case . endOfLine:
171
83
return isAtEndOfLine ( payload)
172
-
84
+
173
85
case . caretAnchor:
174
86
if payload. anchorsMatchNewlines {
175
87
return isAtStartOfLine ( payload)
@@ -202,3 +114,152 @@ extension Processor {
202
114
}
203
115
}
204
116
}
117
+
118
+ // MARK: Built-in character class matching
119
+
120
+ extension String {
121
+
122
+ // Mentioned in ProgrammersManual.md, update docs if redesigned
123
+ func _matchBuiltinCC(
124
+ _ cc: _CharacterClassModel . Representation ,
125
+ at currentPosition: String . Index ,
126
+ isInverted: Bool ,
127
+ isStrictASCII: Bool ,
128
+ isScalarSemantics: Bool
129
+ ) -> String . Index ? {
130
+ guard currentPosition < endIndex else {
131
+ return nil
132
+ }
133
+ if case . definite( let result) = _quickMatchBuiltinCC (
134
+ cc,
135
+ at: currentPosition,
136
+ isInverted: isInverted,
137
+ isStrictASCII: isStrictASCII,
138
+ isScalarSemantics: isScalarSemantics
139
+ ) {
140
+ assert ( result == _thoroughMatchBuiltinCC (
141
+ cc,
142
+ at: currentPosition,
143
+ isInverted: isInverted,
144
+ isStrictASCII: isStrictASCII,
145
+ isScalarSemantics: isScalarSemantics) )
146
+ return result
147
+ }
148
+ return _thoroughMatchBuiltinCC (
149
+ cc,
150
+ at: currentPosition,
151
+ isInverted: isInverted,
152
+ isStrictASCII: isStrictASCII,
153
+ isScalarSemantics: isScalarSemantics)
154
+ }
155
+
156
+ // Mentioned in ProgrammersManual.md, update docs if redesigned
157
+ @inline ( __always)
158
+ func _quickMatchBuiltinCC(
159
+ _ cc: _CharacterClassModel . Representation ,
160
+ at currentPosition: String . Index ,
161
+ isInverted: Bool ,
162
+ isStrictASCII: Bool ,
163
+ isScalarSemantics: Bool
164
+ ) -> QuickResult < String . Index ? > {
165
+ assert ( currentPosition < endIndex)
166
+ guard let ( next, result) = _quickMatch (
167
+ cc, at: currentPosition, isScalarSemantics: isScalarSemantics
168
+ ) else {
169
+ return . unknown
170
+ }
171
+ return . definite( result == isInverted ? nil : next)
172
+ }
173
+
174
+ // Mentioned in ProgrammersManual.md, update docs if redesigned
175
+ @inline ( never)
176
+ func _thoroughMatchBuiltinCC(
177
+ _ cc: _CharacterClassModel . Representation ,
178
+ at currentPosition: String . Index ,
179
+ isInverted: Bool ,
180
+ isStrictASCII: Bool ,
181
+ isScalarSemantics: Bool
182
+ ) -> String . Index ? {
183
+ assert ( currentPosition < endIndex)
184
+ let char = self [ currentPosition]
185
+ let scalar = unicodeScalars [ currentPosition]
186
+
187
+ let asciiCheck = !isStrictASCII
188
+ || ( scalar. isASCII && isScalarSemantics)
189
+ || char. isASCII
190
+
191
+ var matched : Bool
192
+ var next : String . Index
193
+ switch ( isScalarSemantics, cc) {
194
+ case ( _, . anyGrapheme) :
195
+ next = index ( after: currentPosition)
196
+ case ( _, . anyScalar) :
197
+ next = unicodeScalars. index ( after: currentPosition)
198
+ case ( true , _) :
199
+ next = unicodeScalars. index ( after: currentPosition)
200
+ case ( false , _) :
201
+ next = index ( after: currentPosition)
202
+ }
203
+
204
+ switch cc {
205
+ case . any, . anyGrapheme:
206
+ matched = true
207
+ case . anyScalar:
208
+ if isScalarSemantics {
209
+ matched = true
210
+ } else {
211
+ matched = isOnGraphemeClusterBoundary ( next)
212
+ }
213
+ case . digit:
214
+ if isScalarSemantics {
215
+ matched = scalar. properties. numericType != nil && asciiCheck
216
+ } else {
217
+ matched = char. isNumber && asciiCheck
218
+ }
219
+ case . horizontalWhitespace:
220
+ if isScalarSemantics {
221
+ matched = scalar. isHorizontalWhitespace && asciiCheck
222
+ } else {
223
+ matched = char. _isHorizontalWhitespace && asciiCheck
224
+ }
225
+ case . verticalWhitespace:
226
+ if isScalarSemantics {
227
+ matched = scalar. isNewline && asciiCheck
228
+ } else {
229
+ matched = char. _isNewline && asciiCheck
230
+ }
231
+ case . newlineSequence:
232
+ if isScalarSemantics {
233
+ matched = scalar. isNewline && asciiCheck
234
+ if matched && scalar == " \r "
235
+ && next != endIndex && unicodeScalars [ next] == " \n " {
236
+ // Match a full CR-LF sequence even in scalar semantics
237
+ unicodeScalars. formIndex ( after: & next)
238
+ }
239
+ } else {
240
+ matched = char. _isNewline && asciiCheck
241
+ }
242
+ case . whitespace:
243
+ if isScalarSemantics {
244
+ matched = scalar. properties. isWhitespace && asciiCheck
245
+ } else {
246
+ matched = char. isWhitespace && asciiCheck
247
+ }
248
+ case . word:
249
+ if isScalarSemantics {
250
+ matched = scalar. properties. isAlphabetic && asciiCheck
251
+ } else {
252
+ matched = char. isWordCharacter && asciiCheck
253
+ }
254
+ }
255
+
256
+ if isInverted {
257
+ matched. toggle ( )
258
+ }
259
+
260
+ guard matched else {
261
+ return nil
262
+ }
263
+ return next
264
+ }
265
+ }
0 commit comments