diff --git a/Documentation/Evolution/StringProcessingAlgorithms.md b/Documentation/Evolution/StringProcessingAlgorithms.md index 8680ff75a..74416ae63 100644 --- a/Documentation/Evolution/StringProcessingAlgorithms.md +++ b/Documentation/Evolution/StringProcessingAlgorithms.md @@ -162,10 +162,11 @@ We also propose the following regex-powered algorithms as well as their generic |`replace(:with:subrange:maxReplacements)`| Replaces all occurrences of the sequence matching the given `RegexComponent` or sequence with a given collection | |`split(by:)`| Returns the longest possible subsequences of the collection around elements equal to the given separator | |`firstMatch(of:)`| Returns the first match of the specified `RegexComponent` within the collection | +|`wholeMatch(of:)`| Matches the specified `RegexComponent` in the collection as a whole | +|`prefixMatch(of:)`| Matches the specified `RegexComponent` against the collection at the beginning | |`matches(of:)`| Returns a collection containing all matches of the specified `RegexComponent` | - ## Detailed design ### `CustomMatchingRegexComponent` @@ -389,7 +390,7 @@ extension BidirectionalCollection where SubSequence == Substring { } ``` -#### First match +#### Match ```swift extension BidirectionalCollection where SubSequence == Substring { @@ -398,6 +399,16 @@ extension BidirectionalCollection where SubSequence == Substring { /// - Returns: The first match of `regex` in the collection, or `nil` if /// there isn't a match. public func firstMatch(of regex: R) -> RegexMatch? + + /// Match a regex in its entirety. + /// - Parameter r: The regex to match against. + /// - Returns: The match if there is one, or `nil` if none. + public func wholeMatch(of r: R) -> Regex.Match? + + /// Match part of the regex, starting at the beginning. + /// - Parameter r: The regex to match against. + /// - Returns: The match if there is one, or `nil` if none. + public func prefixMatch(of r: R) -> Regex.Match? } ``` @@ -473,7 +484,7 @@ extension RangeReplaceableCollection where SubSequence == Substring { /// - Returns: A new collection in which all occurrences of subsequence /// matching `regex` in `subrange` are replaced by `replacement`. public func replacing( - _ regex: R, + _ r: R, with replacement: Replacement, subrange: Range, maxReplacements: Int = .max @@ -489,7 +500,7 @@ extension RangeReplaceableCollection where SubSequence == Substring { /// - Returns: A new collection in which all occurrences of subsequence /// matching `regex` are replaced by `replacement`. public func replacing( - _ regex: R, + _ r: R, with replacement: Replacement, maxReplacements: Int = .max ) -> Self where Replacement.Element == Element @@ -502,7 +513,7 @@ extension RangeReplaceableCollection where SubSequence == Substring { /// - maxReplacements: A number specifying how many occurrences of the /// sequence matching `regex` to replace. Default is `Int.max`. public mutating func replace( - _ regex: R, + _ r: R, with replacement: Replacement, maxReplacements: Int = .max ) where Replacement.Element == Element @@ -609,4 +620,4 @@ Trimming a string from both sides shares a similar story. For example, `"ababa". ### Future API -Some Python functions are not currently included in this proposal, such as trimming the suffix from a string/collection. This pitch aims to establish a pattern for using `RegexComponent` with string processing algorithms, so that further enhancement can to be introduced to the standard library easily in the future, and eventually close the gap between Swift and other popular scripting languages. +Some common string processing functions are not currently included in this proposal, such as trimming the suffix from a string/collection, and finding overlapping ranges of matched substrings. This pitch aims to establish a pattern for using `RegexComponent` with string processing algorithms, so that further enhancement can to be introduced to the standard library easily in the future, and eventually close the gap between Swift and other popular scripting languages. diff --git a/Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift b/Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift index cb527f948..4342391af 100644 --- a/Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift +++ b/Sources/_StringProcessing/Algorithms/Matching/FirstMatch.swift @@ -39,6 +39,7 @@ extension BidirectionalCollection { extension BidirectionalCollection where SubSequence == Substring { @available(SwiftStdlib 5.7, *) + @_disfavoredOverload func firstMatch( of regex: R ) -> _MatchResult>? { diff --git a/Sources/_StringProcessing/Regex/Match.swift b/Sources/_StringProcessing/Regex/Match.swift index 45b177867..e38af43f8 100644 --- a/Sources/_StringProcessing/Regex/Match.swift +++ b/Sources/_StringProcessing/Regex/Match.swift @@ -159,32 +159,23 @@ extension Regex { } @available(SwiftStdlib 5.7, *) -extension String { +extension BidirectionalCollection where SubSequence == Substring { + /// Match a regex in its entirety. + /// - Parameter r: The regex to match against. + /// - Returns: The match if there is one, or `nil` if none. public func wholeMatch( of r: R ) -> Regex.Match? { - try? r.regex.wholeMatch(in: self) + try? r.regex.wholeMatch(in: self[...].base) } + /// Match part of the regex, starting at the beginning. + /// - Parameter r: The regex to match against. + /// - Returns: The match if there is one, or `nil` if none. public func prefixMatch( of r: R ) -> Regex.Match? { - try? r.regex.prefixMatch(in: self) - } -} - -@available(SwiftStdlib 5.7, *) -extension Substring { - public func wholeMatch( - of r: R - ) -> Regex.Match? { - try? r.regex.wholeMatch(in: self) - } - - public func prefixMatch( - of r: R - ) -> Regex.Match? { - try? r.regex.prefixMatch(in: self) + try? r.regex.prefixMatch(in: self[...]) } } diff --git a/Tests/RegexBuilderTests/CustomTests.swift b/Tests/RegexBuilderTests/CustomTests.swift index bf4489a68..d17c3a142 100644 --- a/Tests/RegexBuilderTests/CustomTests.swift +++ b/Tests/RegexBuilderTests/CustomTests.swift @@ -133,6 +133,51 @@ func customTest( } } +// Test support +struct Concat : Equatable { + var wrapped: String + init(_ name: String, _ suffix: Int?) { + if let suffix = suffix { + wrapped = name + String(suffix) + } else { + wrapped = name + } + } +} + +extension Concat : Collection { + typealias Index = String.Index + typealias Element = String.Element + + var startIndex: Index { return wrapped.startIndex } + var endIndex: Index { return wrapped.endIndex } + + subscript(position: Index) -> Element { + return wrapped[position] + } + + func index(after i: Index) -> Index { + return wrapped.index(after: i) + } +} + +extension Concat: BidirectionalCollection { + typealias Indices = String.Indices + typealias SubSequence = String.SubSequence + + func index(before i: Index) -> Index { + return wrapped.index(before: i) + } + + var indices: Indices { + wrapped.indices + } + + subscript(bounds: Range) -> Substring { + Substring(wrapped[bounds]) + } +} + class CustomRegexComponentTests: XCTestCase { // TODO: Refactor below into more exhaustive, declarative // tests. @@ -467,4 +512,120 @@ class CustomRegexComponentTests: XCTestCase { ) } + + + func testMatchVarients() { + func customTest( + _ regex: Regex, + _ input: Concat, + expected: (wholeMatch: Match?, firstMatch: Match?, prefixMatch: Match?), + file: StaticString = #file, line: UInt = #line + ) { + let wholeResult = input.wholeMatch(of: regex)?.output + let firstResult = input.firstMatch(of: regex)?.output + let prefixResult = input.prefixMatch(of: regex)?.output + XCTAssertEqual(wholeResult, expected.wholeMatch, file: file, line: line) + XCTAssertEqual(firstResult, expected.firstMatch, file: file, line: line) + XCTAssertEqual(prefixResult, expected.prefixMatch, file: file, line: line) + } + + typealias CaptureMatch1 = (Substring, Int?) + func customTest( + _ regex: Regex, + _ input: Concat, + expected: (wholeMatch: CaptureMatch1?, firstMatch: CaptureMatch1?, prefixMatch: CaptureMatch1?), + file: StaticString = #file, line: UInt = #line + ) { + let wholeResult = input.wholeMatch(of: regex)?.output + let firstResult = input.firstMatch(of: regex)?.output + let prefixResult = input.prefixMatch(of: regex)?.output + XCTAssertEqual(wholeResult?.0, expected.wholeMatch?.0, file: file, line: line) + XCTAssertEqual(wholeResult?.1, expected.wholeMatch?.1, file: file, line: line) + + XCTAssertEqual(firstResult?.0, expected.firstMatch?.0, file: file, line: line) + XCTAssertEqual(firstResult?.1, expected.firstMatch?.1, file: file, line: line) + + XCTAssertEqual(prefixResult?.0, expected.prefixMatch?.0, file: file, line: line) + XCTAssertEqual(prefixResult?.1, expected.prefixMatch?.1, file: file, line: line) + } + + var regex = Regex { + OneOrMore(.digit) + } + + customTest(regex, Concat("amy", 2023), expected:(nil, "2023", nil)) // amy2023 + customTest(regex, Concat("amy2023", nil), expected:(nil, "2023", nil)) + customTest(regex, Concat("amy", nil), expected:(nil, nil, nil)) + customTest(regex, Concat("", 2023), expected:("2023", "2023", "2023")) // 2023 + customTest(regex, Concat("bob012b", 2023), expected:(nil, "012", nil)) // b012b2023 + customTest(regex, Concat("bob012b", nil), expected:(nil, "012", nil)) + customTest(regex, Concat("007bob", 2023), expected:(nil, "007", "007")) + customTest(regex, Concat("", nil), expected:(nil, nil, nil)) + + regex = Regex { + OneOrMore(CharacterClass("a"..."z")) + } + + customTest(regex, Concat("amy", 2023), expected:(nil, "amy", "amy")) // amy2023 + customTest(regex, Concat("amy", nil), expected:("amy", "amy", "amy")) + customTest(regex, Concat("amy2022-bob", 2023), expected:(nil, "amy", "amy")) // amy2023 + customTest(regex, Concat("", 2023), expected:(nil, nil, nil)) // 2023 + customTest(regex, Concat("bob012b", 2023), expected:(nil, "bob", "bob")) // b012b2023 + customTest(regex, Concat("bob012b", nil), expected:(nil, "bob", "bob")) + customTest(regex, Concat("007bob", 2023), expected:(nil, "bob", nil)) + customTest(regex, Concat("", nil), expected:(nil, nil, nil)) + + regex = Regex { + OneOrMore { + CharacterClass("A"..."Z") + OneOrMore(CharacterClass("a"..."z")) + Repeat(.digit, count: 2) + } + } + + customTest(regex, Concat("Amy12345", nil), expected:(nil, "Amy12", "Amy12")) + customTest(regex, Concat("Amy", 2023), expected:(nil, "Amy20", "Amy20")) + customTest(regex, Concat("Amy", 23), expected:("Amy23", "Amy23", "Amy23")) + customTest(regex, Concat("", 2023), expected:(nil, nil, nil)) // 2023 + customTest(regex, Concat("Amy23 Boba17", nil), expected:(nil, "Amy23", "Amy23")) + customTest(regex, Concat("amy23 Boba17", nil), expected:(nil, "Boba17", nil)) + customTest(regex, Concat("Amy23 boba17", nil), expected:(nil, "Amy23", "Amy23")) + customTest(regex, Concat("amy23 Boba", 17), expected:(nil, "Boba17", nil)) + customTest(regex, Concat("Amy23Boba17", nil), expected:("Amy23Boba17", "Amy23Boba17", "Amy23Boba17")) + customTest(regex, Concat("Amy23Boba", 17), expected:("Amy23Boba17", "Amy23Boba17", "Amy23Boba17")) + customTest(regex, Concat("23 Boba", 17), expected:(nil, "Boba17", nil)) + + let twoDigitRegex = Regex { + OneOrMore { + CharacterClass("A"..."Z") + OneOrMore(CharacterClass("a"..."z")) + Capture(Repeat(.digit, count: 2)) { Int($0) } + } + } + + customTest(twoDigitRegex, Concat("Amy12345", nil), expected: (nil, ("Amy12", 12), ("Amy12", 12))) + customTest(twoDigitRegex, Concat("Amy", 12345), expected: (nil, ("Amy12", 12), ("Amy12", 12))) + customTest(twoDigitRegex, Concat("Amy", 12), expected: (("Amy12", 12), ("Amy12", 12), ("Amy12", 12))) + customTest(twoDigitRegex, Concat("Amy23 Boba", 17), expected: (nil, firstMatch: ("Amy23", 23), prefixMatch: ("Amy23", 23))) + customTest(twoDigitRegex, Concat("amy23 Boba20", 23), expected:(nil, ("Boba20", 20), nil)) + customTest(twoDigitRegex, Concat("Amy23Boba17", nil), expected:(("Amy23Boba17", 17), ("Amy23Boba17", 17), ("Amy23Boba17", 17))) + customTest(twoDigitRegex, Concat("Amy23Boba", 17), expected:(("Amy23Boba17", 17), ("Amy23Boba17", 17), ("Amy23Boba17", 17))) + + let millennium = Regex { + CharacterClass("A"..."Z") + OneOrMore(CharacterClass("a"..."z")) + Capture { Repeat(.digit, count: 4) } transform: { v -> Int? in + guard let year = Int(v) else { return nil } + return year > 2000 ? year : nil + } + } + + customTest(millennium, Concat("Amy2025", nil), expected: (("Amy2025", 2025), ("Amy2025", 2025), ("Amy2025", 2025))) + customTest(millennium, Concat("Amy", 2025), expected: (("Amy2025", 2025), ("Amy2025", 2025), ("Amy2025", 2025))) + customTest(millennium, Concat("Amy1995", nil), expected: (("Amy1995", nil), ("Amy1995", nil), ("Amy1995", nil))) + customTest(millennium, Concat("Amy", 1995), expected: (("Amy1995", nil), ("Amy1995", nil), ("Amy1995", nil))) + customTest(millennium, Concat("amy2025", nil), expected: (nil, nil, nil)) + customTest(millennium, Concat("amy", 2025), expected: (nil, nil, nil)) + } } +