From d7a47cb1db12f8769964863dc739a92847c2a9df Mon Sep 17 00:00:00 2001 From: Alejandro Alonso Date: Sat, 28 May 2022 12:21:33 -0700 Subject: [PATCH] Implement Regex.mapOutput fix no capture --- Sources/_StringProcessing/Capture.swift | 5 ++ Sources/_StringProcessing/Executor.swift | 1 - Sources/_StringProcessing/Regex/Core.swift | 4 +- .../_StringProcessing/Regex/MapOutput.swift | 30 +++++++++++ Sources/_StringProcessing/Regex/Match.swift | 19 ++++--- Tests/RegexTests/MapOutputTests.swift | 51 +++++++++++++++++++ 6 files changed, 102 insertions(+), 8 deletions(-) create mode 100644 Sources/_StringProcessing/Regex/MapOutput.swift create mode 100644 Tests/RegexTests/MapOutputTests.swift diff --git a/Sources/_StringProcessing/Capture.swift b/Sources/_StringProcessing/Capture.swift index fe00bdc0f..0939adb8a 100644 --- a/Sources/_StringProcessing/Capture.swift +++ b/Sources/_StringProcessing/Capture.swift @@ -72,6 +72,11 @@ extension Sequence where Element == AnyRegexOutput.Element { caps.append(contentsOf: self.map { $0.existentialOutputComponent(from: input) }) + + if caps.count == 1 { + return input + } + return TypeConstruction.tuple(of: caps) } diff --git a/Sources/_StringProcessing/Executor.swift b/Sources/_StringProcessing/Executor.swift index 4f428cf06..416ddc1d5 100644 --- a/Sources/_StringProcessing/Executor.swift +++ b/Sources/_StringProcessing/Executor.swift @@ -50,7 +50,6 @@ struct Executor { caps.isEmpty { value = cpu.registers.values.first - assert(value != nil, "hmm, what would this mean?") } else { value = nil } diff --git a/Sources/_StringProcessing/Regex/Core.swift b/Sources/_StringProcessing/Regex/Core.swift index 5d2101afe..5cca29eef 100644 --- a/Sources/_StringProcessing/Regex/Core.swift +++ b/Sources/_StringProcessing/Regex/Core.swift @@ -29,7 +29,9 @@ public protocol RegexComponent { @available(SwiftStdlib 5.7, *) public struct Regex: RegexComponent { let program: Program - + + var outputTransform: ((Any) -> Output)? + var hasCapture: Bool { program.tree.hasCapture } diff --git a/Sources/_StringProcessing/Regex/MapOutput.swift b/Sources/_StringProcessing/Regex/MapOutput.swift new file mode 100644 index 000000000..10912b92b --- /dev/null +++ b/Sources/_StringProcessing/Regex/MapOutput.swift @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +@available(SwiftStdlib 5.7, *) +extension Regex { + @available(SwiftStdlib 5.7, *) + public func mapOutput( + _ body: @escaping (Output) -> NewOutput + ) -> Regex { + let transform: (Any) -> NewOutput = { + if let previousTransform = outputTransform { + return body(previousTransform($0)) + } else { + return body($0 as! Output) + } + } + + var regex = Regex(node: root) + regex.outputTransform = transform + return regex + } +} diff --git a/Sources/_StringProcessing/Regex/Match.swift b/Sources/_StringProcessing/Regex/Match.swift index 78c9c8c9f..e27df083b 100644 --- a/Sources/_StringProcessing/Regex/Match.swift +++ b/Sources/_StringProcessing/Regex/Match.swift @@ -23,6 +23,8 @@ extension Regex { public let range: Range let value: Any? + + var transform: ((Any) -> Output)? } } @@ -42,9 +44,6 @@ extension Regex.Match { ) return output as! Output - } else if Output.self == Substring.self { - // FIXME: Plumb whole match (`.0`) through the matching engine. - return anyRegexOutput.input[range] as! Output } else if anyRegexOutput.isEmpty, value != nil { // FIXME: This is a workaround for whole-match values not // being modeled as part of captures. We might want to @@ -57,7 +56,12 @@ extension Regex.Match { let typeErasedMatch = anyRegexOutput.existentialOutput( from: anyRegexOutput.input[range] ) - return typeErasedMatch as! Output + + if let transform = transform { + return transform(typeErasedMatch) + } else { + return typeErasedMatch as! Output + } } } @@ -144,7 +148,9 @@ extension Regex { mode: MatchMode = .wholeString ) throws -> Regex.Match? { let executor = Executor(program: regex.program.loweredProgram) - return try executor.match(input, in: inputRange, mode) + var match: Match? = try executor.match(input, in: inputRange, mode) + match?.transform = outputTransform + return match } func _firstMatch( @@ -157,7 +163,8 @@ extension Regex { var low = inputRange.lowerBound let high = inputRange.upperBound while true { - if let m = try _match(input, in: low..= high { return nil } diff --git a/Tests/RegexTests/MapOutputTests.swift b/Tests/RegexTests/MapOutputTests.swift new file mode 100644 index 000000000..801f8bc5d --- /dev/null +++ b/Tests/RegexTests/MapOutputTests.swift @@ -0,0 +1,51 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift.org open source project +// +// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +@testable import _RegexParser + +import XCTest +@testable import _StringProcessing + +enum Transaction: String { + case credit + case debit +} + +extension RegexTests { + func testMapOutput() throws { + let regex0: Regex<(Substring, Substring)> = try Regex(#"Transaction: (credit|debit)"#) + let string0 = "Transaction: credit" + + let regex1 = regex0.mapOutput { + ($0, transaction: Transaction(rawValue: String($1))) + } + + let match0 = try XCTUnwrap(string0.firstMatch(of: regex1)?.output) + XCTAssertTrue(match0 == ("Transaction: credit", .credit)) + + let regex2 = regex1.mapOutput { + $1 + } + + let match1 = try XCTUnwrap(string0.firstMatch(of: regex2)?.output) + XCTAssertEqual(match1, .credit) + + let regex3: Regex = try Regex(#"Hello"#) + let string1 = "Hello" + + let regex4 = regex3.mapOutput { + "\($0) world!"[...] + } + + let match2 = try XCTUnwrap(string1.firstMatch(of: regex4)?.output) + XCTAssertEqual(match2, "Hello world!") + } +}