Skip to content

Persist persistent state in the processor. #773

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ let package = Package(
name: "VariadicsGenerator",
targets: ["VariadicsGenerator"]),
// Disable to work around rdar://126877024
// .executable(
// name: "RegexBenchmark",
// targets: ["RegexBenchmark"])
.executable(
name: "RegexBenchmark",
targets: ["RegexBenchmark"])
],
dependencies: [
.package(url: "https://github.com/apple/swift-argument-parser", from: "1.0.0"),
Expand Down Expand Up @@ -143,17 +143,17 @@ let package = Package(
"_StringProcessing"
],
swiftSettings: [availabilityDefinition]),
// .executableTarget(
// name: "RegexBenchmark",
// dependencies: [
// .product(name: "ArgumentParser", package: "swift-argument-parser"),
// "_RegexParser",
// "_StringProcessing",
// "RegexBuilder"
// ],
// swiftSettings: [
// .unsafeFlags(["-Xfrontend", "-disable-availability-checking"]),
// ]),
.executableTarget(
name: "RegexBenchmark",
dependencies: [
.product(name: "ArgumentParser", package: "swift-argument-parser"),
"_RegexParser",
"_StringProcessing",
"RegexBuilder"
],
swiftSettings: [
.unsafeFlags(["-Xfrontend", "-disable-availability-checking"]),
]),

// MARK: Exercises
.target(
Expand Down
11 changes: 8 additions & 3 deletions Sources/RegexBenchmark/Benchmark.swift
Original file line number Diff line number Diff line change
Expand Up @@ -153,29 +153,34 @@ struct CrossBenchmark {
/// Whether to also run scalar-semantic mode
var alsoRunScalarSemantic: Bool = true

var alsoRunSimpleWordBoundaries: Bool = false

func register(_ runner: inout BenchmarkRunner) {
if isWhole {
runner.registerCrossBenchmark(
nameBase: baseName,
input: input,
pattern: regex,
.whole,
alsoRunScalarSemantic: alsoRunScalarSemantic)
alsoRunScalarSemantic: alsoRunScalarSemantic,
alsoRunSimpleWordBoundaries: alsoRunSimpleWordBoundaries)
} else {
runner.registerCrossBenchmark(
nameBase: baseName,
input: input,
pattern: regex,
.allMatches,
alsoRunScalarSemantic: alsoRunScalarSemantic)
alsoRunScalarSemantic: alsoRunScalarSemantic,
alsoRunSimpleWordBoundaries: alsoRunSimpleWordBoundaries)

if includeFirst || runner.includeFirstOverride {
runner.registerCrossBenchmark(
nameBase: baseName,
input: input,
pattern: regex,
.first,
alsoRunScalarSemantic: alsoRunScalarSemantic)
alsoRunScalarSemantic: alsoRunScalarSemantic,
alsoRunSimpleWordBoundaries: alsoRunSimpleWordBoundaries)
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions Sources/RegexBenchmark/BenchmarkRegistration.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ extension BenchmarkRunner {
self.addDiceNotation()
self.addErrorMessages()
self.addIpAddress()

self.addURLWithWordBoundaries()
// -- end of registrations --
}
}
13 changes: 12 additions & 1 deletion Sources/RegexBenchmark/BenchmarkRunner.swift
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ struct BenchmarkRunner {
input: String,
pattern: String,
_ type: Benchmark.MatchType,
alsoRunScalarSemantic: Bool = true
alsoRunScalarSemantic: Bool = true,
alsoRunSimpleWordBoundaries: Bool
) {
let swiftRegex = try! Regex(pattern)
let nsRegex: NSRegularExpression
Expand All @@ -58,6 +59,16 @@ struct BenchmarkRunner {
type: .init(type),
target: input))

if alsoRunSimpleWordBoundaries {
register(
Benchmark(
name: nameBase + nameSuffix + "_SimpleWordBoundaries",
regex: swiftRegex.wordBoundaryKind(.simple),
pattern: pattern,
type: type,
target: input))
}

if alsoRunScalarSemantic {
register(
Benchmark(
Expand Down
11 changes: 9 additions & 2 deletions Sources/RegexBenchmark/CLI.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ struct Runner: ParsableCommand {

@Flag(help: "Exclude running NSRegex benchmarks")
var excludeNs = false


@Flag(help: "Rather than specify specific-benchmarks as patterns, use exact names")
var exactName = false

@Flag(help: """
Enable tracing of the engine (warning: lots of output). Prints out processor state each cycle

Expand Down Expand Up @@ -73,7 +76,11 @@ swift build -c release -Xswiftc -DPROCESSOR_MEASUREMENTS_ENABLED
if !self.specificBenchmarks.isEmpty {
runner.suite = runner.suite.filter { b in
specificBenchmarks.contains { pattern in
try! Regex(pattern).firstMatch(in: b.name) != nil
if exactName {
return pattern == b.name
}

return try! Regex(pattern).firstMatch(in: b.name) != nil
}
}
}
Expand Down
22 changes: 22 additions & 0 deletions Sources/RegexBenchmark/Inputs/URL.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
extension Inputs {
static let url: String = {
let element = """
Item 1 | Item 2® •Item 3 Item4


\t\t\t

Check it out here: http://www.test.com/this-is-a-fake-url-that-should-be-replaced?a=1
Check it out here: https://www.test.com/this-is-a-fake-url-that-should-be-replaced?a=1
This is not a web link ftp://user@host:domain.com/path
This is a link without a scheme www.apple.com/mac

This is some good text and should not be removed.
Thanks.
😀🩷🤵🏿
"""
let multiplier = 30
return Array(repeating: element, count: multiplier).joined()
}()

}
14 changes: 14 additions & 0 deletions Sources/RegexBenchmark/Suite/URLRegex.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import _StringProcessing

extension BenchmarkRunner {
mutating func addURLWithWordBoundaries() {
let urlRegex = #"https?://([-a-zA-Z0-9@:%._+~#=]{1,256}\.[a-zA-Z0-9()]{1,6})\b[-a-zA-Z0-9()@:%_+.~#?&=]*"#
let url = CrossBenchmark(
baseName: "URLWithWordBoundaries",
regex: urlRegex,
input: Inputs.url,
alsoRunSimpleWordBoundaries: true
)
url.register(&self)
}
}
4 changes: 2 additions & 2 deletions Sources/_StringProcessing/Algorithms/Algorithms/Ranges.swift
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,10 @@ struct RegexRangesSequence<Output> {
regex: Regex<Output>
) {
self.base = .init(
program: regex.regex.program.loweredProgram,
input: input,
subjectBounds: subjectBounds,
searchBounds: searchBounds,
regex: regex)
searchBounds: searchBounds)
}
}

Expand Down
85 changes: 4 additions & 81 deletions Sources/_StringProcessing/Algorithms/Matching/Matches.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,85 +12,7 @@
// MARK: Regex algorithms

@available(SwiftStdlib 5.7, *)
struct RegexMatchesSequence<Output> {
let input: String
let subjectBounds: Range<String.Index>
let searchBounds: Range<String.Index>
let regex: Regex<Output>

init(
input: String,
subjectBounds: Range<String.Index>,
searchBounds: Range<String.Index>,
regex: Regex<Output>
) {
self.input = input
self.subjectBounds = subjectBounds
self.searchBounds = searchBounds
self.regex = regex
}
}

@available(SwiftStdlib 5.7, *)
extension RegexMatchesSequence: Sequence {
/// Returns the index to start searching for the next match after `match`.
fileprivate func searchIndex(after match: Regex<Output>.Match) -> String.Index? {
if !match.range.isEmpty {
return match.range.upperBound
}

// If the last match was an empty match, advance by one position and
// run again, unless at the end of `input`.
guard match.range.lowerBound < subjectBounds.upperBound else {
return nil
}

switch regex.initialOptions.semanticLevel {
case .graphemeCluster:
return input.index(after: match.range.upperBound)
case .unicodeScalar:
return input.unicodeScalars.index(after: match.range.upperBound)
}
}

struct Iterator: IteratorProtocol {
let base: RegexMatchesSequence

// Because `RegexMatchesCollection` eagerly computes the first match for
// its `startIndex`, the iterator can use that match for its initial
// iteration. For subsequent calls to `next()`, this value is `false`, and
// `nextStart` is used to search for the next match.
var initialIteration = true

// Set to nil when iteration is finished (because some regex can empty-match
// at the end of the subject).
var currentPosition: String.Index?

init(_ matches: RegexMatchesSequence) {
self.base = matches
self.currentPosition = base.subjectBounds.lowerBound
}

mutating func next() -> Regex<Output>.Match? {
// `currentPosition` is `nil` when iteration has completed
guard let position = currentPosition, position <= base.searchBounds.upperBound else {
return nil
}

// Otherwise, find the next match (if any) and compute `nextStart`
let match = try? base.regex._firstMatch(
base.input,
subjectBounds: base.subjectBounds,
searchBounds: position..<base.searchBounds.upperBound)
currentPosition = match.flatMap(base.searchIndex(after:))
return match
}
}

func makeIterator() -> Iterator {
Iterator(self)
}
}
typealias RegexMatchesSequence<Output> = Executor<Output>.Matches

extension BidirectionalCollection where SubSequence == Substring {
@available(SwiftStdlib 5.7, *)
Expand All @@ -99,10 +21,10 @@ extension BidirectionalCollection where SubSequence == Substring {
of regex: R
) -> RegexMatchesSequence<R.RegexOutput> {
RegexMatchesSequence(
program: regex.regex.program.loweredProgram,
input: self[...].base,
subjectBounds: startIndex..<endIndex,
searchBounds: startIndex..<endIndex,
regex: regex.regex)
searchBounds: startIndex..<endIndex)
}

// FIXME: Return `some Collection<Regex<R.Output>.Match> for SE-0346
Expand All @@ -116,6 +38,7 @@ extension BidirectionalCollection where SubSequence == Substring {
// FIXME: Array init calls count, which double-executes the regex :-(
// FIXME: just return some Collection<Regex<Output>.Match>
var result = Array<Regex<Output>.Match>()

for match in _matches(of: r) {
result.append(match)
}
Expand Down
2 changes: 0 additions & 2 deletions Sources/_StringProcessing/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ add_library(_StringProcessing
Algorithms/Searchers/CollectionSearcher.swift
Algorithms/Searchers/ZSearcher.swift
Engine/Backtracking.swift
Engine/Consume.swift
Engine/Engine.swift
Engine/InstPayload.swift
Engine/Instruction.swift
Engine/MEBuilder.swift
Expand Down
4 changes: 2 additions & 2 deletions Sources/_StringProcessing/Compiler.swift
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ func _compileRegex(
_ regex: String,
_ syntax: SyntaxOptions = .traditional,
_ semanticLevel: RegexSemanticLevel? = nil
) throws -> Executor {
) throws -> MEProgram {
let ast = try parse(regex, syntax)
let dsl: DSLTree

Expand All @@ -104,7 +104,7 @@ func _compileRegex(
dsl = ast.dslTree
}
let program = try Compiler(tree: dsl).emit()
return Executor(program: program)
return program
}

@_spi(RegexBenchmark)
Expand Down
58 changes: 0 additions & 58 deletions Sources/_StringProcessing/Engine/Consume.swift

This file was deleted.

Loading