Skip to content

Implement Regex.mapOutput #455

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Sources/_StringProcessing/Capture.swift
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ extension Sequence where Element == AnyRegexOutput.Element {
caps.append(contentsOf: self.map {
$0.existentialOutputComponent(from: input)
})

if caps.count == 1 {
return input
}

return TypeConstruction.tuple(of: caps)
}

Expand Down
1 change: 0 additions & 1 deletion Sources/_StringProcessing/Executor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ struct Executor {
caps.isEmpty
{
value = cpu.registers.values.first
assert(value != nil, "hmm, what would this mean?")
} else {
value = nil
}
Expand Down
4 changes: 3 additions & 1 deletion Sources/_StringProcessing/Regex/Core.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ public protocol RegexComponent {
@available(SwiftStdlib 5.7, *)
public struct Regex<Output>: RegexComponent {
let program: Program


var outputTransform: ((Any) -> Output)?

var hasCapture: Bool {
program.tree.hasCapture
}
Expand Down
30 changes: 30 additions & 0 deletions Sources/_StringProcessing/Regex/MapOutput.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

@available(SwiftStdlib 5.7, *)
extension Regex {
@available(SwiftStdlib 5.7, *)
public func mapOutput<NewOutput>(
_ body: @escaping (Output) -> NewOutput
) -> Regex<NewOutput> {
let transform: (Any) -> NewOutput = {
if let previousTransform = outputTransform {
return body(previousTransform($0))
} else {
return body($0 as! Output)
}
}

var regex = Regex<NewOutput>(node: root)
regex.outputTransform = transform
return regex
}
}
19 changes: 13 additions & 6 deletions Sources/_StringProcessing/Regex/Match.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ extension Regex {
public let range: Range<String.Index>

let value: Any?

var transform: ((Any) -> Output)?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the tradeoff of storing transform here vs just doing it when making a Match object?

}
}

Expand All @@ -42,9 +44,6 @@ extension Regex.Match {
)

return output as! Output
} else if Output.self == Substring.self {
// FIXME: Plumb whole match (`.0`) through the matching engine.
return anyRegexOutput.input[range] as! Output
} else if anyRegexOutput.isEmpty, value != nil {
// FIXME: This is a workaround for whole-match values not
// being modeled as part of captures. We might want to
Expand All @@ -57,7 +56,12 @@ extension Regex.Match {
let typeErasedMatch = anyRegexOutput.existentialOutput(
from: anyRegexOutput.input[range]
)
return typeErasedMatch as! Output

if let transform = transform {
return transform(typeErasedMatch)
} else {
return typeErasedMatch as! Output
}
}
}

Expand Down Expand Up @@ -144,7 +148,9 @@ extension Regex {
mode: MatchMode = .wholeString
) throws -> Regex<Output>.Match? {
let executor = Executor(program: regex.program.loweredProgram)
return try executor.match(input, in: inputRange, mode)
var match: Match? = try executor.match(input, in: inputRange, mode)
match?.transform = outputTransform
return match
}

func _firstMatch(
Expand All @@ -157,7 +163,8 @@ extension Regex {
var low = inputRange.lowerBound
let high = inputRange.upperBound
while true {
if let m = try _match(input, in: low..<high, mode: .partialFromFront) {
if var m = try _match(input, in: low..<high, mode: .partialFromFront) {
m.transform = outputTransform
return m
}
if low >= high { return nil }
Expand Down
51 changes: 51 additions & 0 deletions Tests/RegexTests/MapOutputTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

@testable import _RegexParser

import XCTest
@testable import _StringProcessing

enum Transaction: String {
case credit
case debit
}

extension RegexTests {
func testMapOutput() throws {
let regex0: Regex<(Substring, Substring)> = try Regex(#"Transaction: (credit|debit)"#)
let string0 = "Transaction: credit"

let regex1 = regex0.mapOutput {
($0, transaction: Transaction(rawValue: String($1)))
}

let match0 = try XCTUnwrap(string0.firstMatch(of: regex1)?.output)
XCTAssertTrue(match0 == ("Transaction: credit", .credit))

let regex2 = regex1.mapOutput {
$1
}

let match1 = try XCTUnwrap(string0.firstMatch(of: regex2)?.output)
XCTAssertEqual(match1, .credit)

let regex3: Regex<Substring> = try Regex(#"Hello"#)
let string1 = "Hello"

let regex4 = regex3.mapOutput {
"\($0) world!"[...]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Curious here — does this cause a problem if you leave off the [...]?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously if the output type was just Substring it would've returned only the whole match instead of actually transforming the output, so I'm testing that even with just a Substring output type we still attempt to transform.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we also add a test of the behavior when dropping an output-mapped regex in a builder block?

}

let match2 = try XCTUnwrap(string1.firstMatch(of: regex4)?.output)
XCTAssertEqual(match2, "Hello world!")
}
}