diff --git a/Package.swift b/Package.swift index f02ef1828..1047ad7d8 100644 --- a/Package.swift +++ b/Package.swift @@ -11,7 +11,11 @@ let availabilityDefinition = PackageDescription.SwiftSetting.unsafeFlags([ "-Xfrontend", "-define-availability", "-Xfrontend", - "SwiftStdlib 5.8:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999", + "SwiftStdlib 5.8:macOS 13.3, iOS 16.4, watchOS 9.4, tvOS 16.4", + "-Xfrontend", + "-define-availability", + "-Xfrontend", + "SwiftStdlib 5.9:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999", ]) /// Swift settings for building a private stdlib-like module that is to be used @@ -128,7 +132,8 @@ let package = Package( .product(name: "ArgumentParser", package: "swift-argument-parser"), "_RegexParser", "_StringProcessing" - ]), + ], + swiftSettings: [availabilityDefinition]), .executableTarget( name: "RegexBenchmark", dependencies: [ diff --git a/Sources/RegexTester/RegexTester.swift b/Sources/RegexTester/RegexTester.swift index 970e47160..052de667f 100644 --- a/Sources/RegexTester/RegexTester.swift +++ b/Sources/RegexTester/RegexTester.swift @@ -14,7 +14,7 @@ import _RegexParser import _StringProcessing @main -@available(macOS 9999, *) +@available(SwiftStdlib 5.8, *) struct RegexTester: ParsableCommand { typealias MatchFunctionType = (String) throws -> Regex.Match? diff --git a/Sources/_RegexParser/Regex/AST/MatchingOptions.swift b/Sources/_RegexParser/Regex/AST/MatchingOptions.swift index 89d4c92a7..be288491d 100644 --- a/Sources/_RegexParser/Regex/AST/MatchingOptions.swift +++ b/Sources/_RegexParser/Regex/AST/MatchingOptions.swift @@ -44,7 +44,7 @@ extension AST { // Swift-only default possessive quantifier case possessiveByDefault // t.b.d. - + // NSRegularExpression compatibility special-case case nsreCompatibleDot // no AST representation } diff --git a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift index 2908c4dc5..e870e1493 100644 --- a/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift +++ b/Sources/_StringProcessing/Algorithms/Algorithms/Trim.swift @@ -81,13 +81,11 @@ extension RangeReplaceableCollection { // MARK: Fixed pattern algorithms extension Collection where Element: Equatable { - /// Returns a new collection of the same type by removing initial elements - /// that satisfy the given predicate from the start. - /// - Parameter predicate: A closure that takes an element of the sequence - /// as its argument and returns a Boolean value indicating whether the - /// element should be removed from the collection. + /// Returns a new collection of the same type by removing `prefix` from the start + /// of the collection. + /// - Parameter prefix: The collection to remove from this collection. /// - Returns: A collection containing the elements of the collection that are - /// not removed by `predicate`. + /// not removed by `prefix`. @available(SwiftStdlib 5.7, *) public func trimmingPrefix( _ prefix: Prefix @@ -97,11 +95,8 @@ extension Collection where Element: Equatable { } extension Collection where SubSequence == Self, Element: Equatable { - /// Removes the initial elements that satisfy the given predicate from the - /// start of the sequence. - /// - Parameter predicate: A closure that takes an element of the sequence - /// as its argument and returns a Boolean value indicating whether the - /// element should be removed from the collection. + /// Removes `prefix` from the start of the collection. + /// - Parameter prefix: The collection to remove from this collection. @available(SwiftStdlib 5.7, *) public mutating func trimPrefix( _ prefix: Prefix @@ -111,11 +106,8 @@ extension Collection where SubSequence == Self, Element: Equatable { } extension RangeReplaceableCollection where Element: Equatable { - /// Removes the initial elements that satisfy the given predicate from the - /// start of the sequence. - /// - Parameter predicate: A closure that takes an element of the sequence - /// as its argument and returns a Boolean value indicating whether the - /// element should be removed from the collection. + /// Removes `prefix` from the start of the collection. + /// - Parameter prefix: The collection to remove from this collection. @available(SwiftStdlib 5.7, *) public mutating func trimPrefix( _ prefix: Prefix @@ -127,11 +119,11 @@ extension RangeReplaceableCollection where Element: Equatable { // MARK: Regex algorithms extension BidirectionalCollection where SubSequence == Substring { - /// Returns a new collection of the same type by removing `prefix` from the - /// start. - /// - Parameter prefix: The collection to remove from this collection. + /// Returns a new collection of the same type by removing the initial elements + /// that matches the given regex. + /// - Parameter regex: The regex to remove from this collection. /// - Returns: A collection containing the elements that does not match - /// `prefix` from the start. + /// `regex` from the start. @_disfavoredOverload @available(SwiftStdlib 5.7, *) public func trimmingPrefix(_ regex: some RegexComponent) -> SubSequence { diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index 8f2a52a3c..5e85c7b3d 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -465,16 +465,16 @@ fileprivate extension Compiler.ByteCodeGen { assert(high != 0) assert((0...(high ?? Int.max)).contains(low)) - let extraTrips: Int? + let maxExtraTrips: Int? if let h = high { - extraTrips = h - low + maxExtraTrips = h - low } else { - extraTrips = nil + maxExtraTrips = nil } let minTrips = low - assert((extraTrips ?? 1) >= 0) + assert((maxExtraTrips ?? 1) >= 0) - if tryEmitFastQuant(child, updatedKind, minTrips, extraTrips) { + if tryEmitFastQuant(child, updatedKind, minTrips, maxExtraTrips) { return } @@ -492,19 +492,19 @@ fileprivate extension Compiler.ByteCodeGen { decrement %minTrips and fallthrough loop-body: - : + : mov currentPosition %pos evaluate the subexpression - : + : if %pos is currentPosition: goto exit goto min-trip-count control block exit-policy control block: - if %extraTrips is zero: + if %maxExtraTrips is zero: goto exit else: - decrement %extraTrips and fallthrough + decrement %maxExtraTrips and fallthrough : save exit and goto loop-body @@ -531,12 +531,12 @@ fileprivate extension Compiler.ByteCodeGen { /* fallthrough */ """ - // Specialization based on `extraTrips` for 0 or unbounded + // Specialization based on `maxExtraTrips` for 0 or unbounded _ = """ exit-policy control block: - : + : goto exit - : + : /* fallthrough */ """ @@ -569,12 +569,12 @@ fileprivate extension Compiler.ByteCodeGen { minTripsReg = nil } - let extraTripsReg: IntRegister? - if (extraTrips ?? 0) > 0 { - extraTripsReg = builder.makeIntRegister( - initialValue: extraTrips!) + let maxExtraTripsReg: IntRegister? + if (maxExtraTrips ?? 0) > 0 { + maxExtraTripsReg = builder.makeIntRegister( + initialValue: maxExtraTrips!) } else { - extraTripsReg = nil + maxExtraTripsReg = nil } // Set up a dummy save point for possessive to update @@ -606,7 +606,7 @@ fileprivate extension Compiler.ByteCodeGen { let startPosition: PositionRegister? let emitPositionChecking = (!optimizationsEnabled || !child.guaranteesForwardProgress) && - extraTrips == nil + maxExtraTrips == nil if emitPositionChecking { startPosition = builder.makePositionRegister() @@ -616,7 +616,7 @@ fileprivate extension Compiler.ByteCodeGen { } try emitNode(child) if emitPositionChecking { - // in all quantifier cases, no matter what minTrips or extraTrips is, + // in all quantifier cases, no matter what minTrips or maxExtraTrips is, // if we have a successful non-advancing match, branch to exit because it // can match an arbitrary number of times builder.buildCondBranch(to: exit, ifSamePositionAs: startPosition!) @@ -629,20 +629,20 @@ fileprivate extension Compiler.ByteCodeGen { } // exit-policy: - // condBranch(to: exit, ifZeroElseDecrement: %extraTrips) + // condBranch(to: exit, ifZeroElseDecrement: %maxExtraTrips) // // // Bool { let isScalarSemantics = options.semanticLevel == .unicodeScalar guard optimizationsEnabled && minTrips <= QuantifyPayload.maxStorableTrips - && extraTrips ?? 0 <= QuantifyPayload.maxStorableTrips + && maxExtraTrips ?? 0 <= QuantifyPayload.maxStorableTrips && kind != .reluctant else { return false } @@ -687,7 +687,7 @@ fileprivate extension Compiler.ByteCodeGen { guard let bitset = ccc.asAsciiBitset(options) else { return false } - builder.buildQuantify(bitset: bitset, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics) + builder.buildQuantify(bitset: bitset, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics) case .atom(let atom): switch atom { @@ -696,17 +696,17 @@ fileprivate extension Compiler.ByteCodeGen { guard let val = c._singleScalarAsciiValue else { return false } - builder.buildQuantify(asciiChar: val, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics) + builder.buildQuantify(asciiChar: val, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics) case .any: builder.buildQuantifyAny( - matchesNewlines: true, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics) + matchesNewlines: true, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics) case .anyNonNewline: builder.buildQuantifyAny( - matchesNewlines: false, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics) + matchesNewlines: false, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics) case .dot: builder.buildQuantifyAny( - matchesNewlines: options.dotMatchesNewline, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics) + matchesNewlines: options.dotMatchesNewline, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics) case .characterClass(let cc): // Custom character class that consumes a single grapheme @@ -715,19 +715,19 @@ fileprivate extension Compiler.ByteCodeGen { model: model, kind, minTrips, - extraTrips, + maxExtraTrips, isScalarSemantics: isScalarSemantics) default: return false } case .convertedRegexLiteral(let node, _): - return tryEmitFastQuant(node, kind, minTrips, extraTrips) + return tryEmitFastQuant(node, kind, minTrips, maxExtraTrips) case .nonCapturingGroup(let groupKind, let node): // .nonCapture nonCapturingGroups are ignored during compilation guard groupKind.ast == .nonCapture else { return false } - return tryEmitFastQuant(node, kind, minTrips, extraTrips) + return tryEmitFastQuant(node, kind, minTrips, maxExtraTrips) default: return false } diff --git a/Sources/_StringProcessing/Engine/Backtracking.swift b/Sources/_StringProcessing/Engine/Backtracking.swift index 48470ce91..11e2db0e4 100644 --- a/Sources/_StringProcessing/Engine/Backtracking.swift +++ b/Sources/_StringProcessing/Engine/Backtracking.swift @@ -13,9 +13,9 @@ extension Processor { struct SavePoint { var pc: InstructionAddress var pos: Position? + // Quantifiers may store a range of positions to restore to - var rangeStart: Position? - var rangeEnd: Position? + var quantifiedRange: Range? // FIXME: refactor, for now this field is only used for quantifier save // points. We should try to separate out the concerns better. @@ -49,64 +49,70 @@ extension Processor { return (pc, pos, stackEnd, captureEnds, intRegisters, posRegisters) } - var rangeIsEmpty: Bool { rangeEnd == nil } - - mutating func updateRange(newEnd: Input.Index) { - if rangeStart == nil { - rangeStart = newEnd - } - rangeEnd = newEnd + // Whether this save point is quantified, meaning it has a range of + // possible positions to explore. + var isQuantified: Bool { + quantifiedRange != nil } /// Move the next range position into pos, and removing it from the range - mutating func takePositionFromRange(_ input: Input) { - assert(!rangeIsEmpty) - pos = rangeEnd! - shrinkRange(input) - } + mutating func takePositionFromQuantifiedRange(_ input: Input) { + assert(isQuantified) + let range = quantifiedRange! + pos = range.upperBound + if range.isEmpty { + // Becomes a normal save point + quantifiedRange = nil + return + } - /// Shrink the range of the save point by one index, essentially dropping the last index - mutating func shrinkRange(_ input: Input) { - assert(!rangeIsEmpty) - if rangeEnd == rangeStart { - // The range is now empty - rangeStart = nil - rangeEnd = nil + // Shrink the range + let newUpper: Position + if isScalarSemantics { + newUpper = input.unicodeScalars.index(before: range.upperBound) } else { - if isScalarSemantics { - input.unicodeScalars.formIndex(before: &rangeEnd!) - } else { - input.formIndex(before: &rangeEnd!) - } + newUpper = input.index(before: range.upperBound) } + quantifiedRange = range.lowerBound.. SavePoint { + SavePoint( + pc: pc, + pos: currentPosition, + quantifiedRange: nil, + isScalarSemantics: false, + stackEnd: .init(callStack.count), + captureEnds: storedCaptures, + intRegisters: registers.ints, + posRegisters: registers.positions) + } + + func makeAddressOnlySavePoint( + resumingAt pc: InstructionAddress ) -> SavePoint { SavePoint( pc: pc, - pos: addressOnly ? nil : currentPosition, - rangeStart: nil, - rangeEnd: nil, - isScalarSemantics: false, // FIXME: refactor away + pos: nil, + quantifiedRange: nil, + isScalarSemantics: false, stackEnd: .init(callStack.count), captureEnds: storedCaptures, intRegisters: registers.ints, posRegisters: registers.positions) } - - func startQuantifierSavePoint( + + func makeQuantifiedSavePoint( + _ range: Range, isScalarSemantics: Bool ) -> SavePoint { - // Restores to the instruction AFTER the current quantifier instruction SavePoint( pc: controller.pc + 1, pos: nil, - rangeStart: nil, - rangeEnd: nil, + quantifiedRange: range, isScalarSemantics: isScalarSemantics, stackEnd: .init(callStack.count), captureEnds: storedCaptures, diff --git a/Sources/_StringProcessing/Engine/InstPayload.swift b/Sources/_StringProcessing/Engine/InstPayload.swift index a0e849851..78baf9ce1 100644 --- a/Sources/_StringProcessing/Engine/InstPayload.swift +++ b/Sources/_StringProcessing/Engine/InstPayload.swift @@ -378,7 +378,7 @@ extension Instruction.Payload { struct QuantifyPayload: RawRepresentable { let rawValue: UInt64 enum PayloadType: UInt64 { - case bitset = 0 + case asciiBitset = 0 case asciiChar = 1 case any = 2 case builtin = 4 @@ -392,18 +392,18 @@ struct QuantifyPayload: RawRepresentable { // b39-b38 - isScalarSemantics // b38-b35 - Payload type (one of 4 types, stored on 3 bits) // b35-b27 - minTrips (8 bit int) - // b27-b18 - extraTrips (8 bit value, one bit for nil) + // b27-b18 - maxExtraTrips (8 bit value, one bit for nil) // b18-b16 - Quantification type (one of three types) // b16-b0 - Payload value (depends on payload type) static var quantKindShift: UInt64 { 16 } - static var extraTripsShift: UInt64 { 18 } + static var maxExtraTripsShift: UInt64 { 18 } static var minTripsShift: UInt64 { 27 } static var typeShift: UInt64 { 35 } static var maxStorableTrips: UInt64 { (1 << 8) - 1 } static var isScalarSemanticsBit: UInt64 { 1 &<< 38 } var quantKindMask: UInt64 { 3 } - var extraTripsMask: UInt64 { 0x1FF } + var maxExtraTripsMask: UInt64 { 0x1FF } var minTripsMask: UInt64 { 0xFF } var typeMask: UInt64 { 7 } var payloadMask: UInt64 { 0xFF_FF } @@ -411,7 +411,7 @@ struct QuantifyPayload: RawRepresentable { static func packInfoValues( _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, _ type: PayloadType, isScalarSemantics: Bool ) -> UInt64 { @@ -425,10 +425,10 @@ struct QuantifyPayload: RawRepresentable { kindVal = 2 } // TODO: refactor / reimplement - let extraTripsVal: UInt64 = extraTrips == nil ? 1 : UInt64(extraTrips!) << 1 + let maxExtraTripsVal: UInt64 = maxExtraTrips == nil ? 1 : UInt64(maxExtraTrips!) << 1 let scalarSemanticsBit = isScalarSemantics ? Self.isScalarSemanticsBit : 0 return (kindVal << QuantifyPayload.quantKindShift) | - (extraTripsVal << QuantifyPayload.extraTripsShift) | + (maxExtraTripsVal << QuantifyPayload.maxExtraTripsShift) | (UInt64(minTrips) << QuantifyPayload.minTripsShift) | (type.rawValue << QuantifyPayload.typeShift) | scalarSemanticsBit @@ -443,41 +443,41 @@ struct QuantifyPayload: RawRepresentable { bitset: AsciiBitsetRegister, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { assert(bitset.bits <= _payloadMask) self.rawValue = bitset.bits - + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .bitset, isScalarSemantics: isScalarSemantics) + + QuantifyPayload.packInfoValues(kind, minTrips, maxExtraTrips, .asciiBitset, isScalarSemantics: isScalarSemantics) } init( asciiChar: UInt8, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { self.rawValue = UInt64(asciiChar) - + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .asciiChar, isScalarSemantics: isScalarSemantics) + + QuantifyPayload.packInfoValues(kind, minTrips, maxExtraTrips, .asciiChar, isScalarSemantics: isScalarSemantics) } init( matchesNewlines: Bool, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { self.rawValue = (matchesNewlines ? 1 : 0) - + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .any, isScalarSemantics: isScalarSemantics) + + QuantifyPayload.packInfoValues(kind, minTrips, maxExtraTrips, .any, isScalarSemantics: isScalarSemantics) } init( model: _CharacterClassModel, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { assert(model.cc.rawValue < 0xFF) @@ -485,7 +485,7 @@ struct QuantifyPayload: RawRepresentable { + (model.isInverted ? 1 << 9 : 0) + (model.isStrictASCII ? 1 << 10 : 0) self.rawValue = packedModel - + QuantifyPayload.packInfoValues(kind, minTrips, extraTrips, .builtin, isScalarSemantics: isScalarSemantics) + + QuantifyPayload.packInfoValues(kind, minTrips, maxExtraTrips, .builtin, isScalarSemantics: isScalarSemantics) } var type: PayloadType { @@ -506,8 +506,8 @@ struct QuantifyPayload: RawRepresentable { (self.rawValue >> QuantifyPayload.minTripsShift) & minTripsMask } - var extraTrips: UInt64? { - let val = (self.rawValue >> QuantifyPayload.extraTripsShift) & extraTripsMask + var maxExtraTrips: UInt64? { + let val = (self.rawValue >> QuantifyPayload.maxExtraTripsShift) & maxExtraTripsMask if val == 1 { return nil } else { diff --git a/Sources/_StringProcessing/Engine/MEBuilder.swift b/Sources/_StringProcessing/Engine/MEBuilder.swift index e26a00fb1..44c938e71 100644 --- a/Sources/_StringProcessing/Engine/MEBuilder.swift +++ b/Sources/_StringProcessing/Engine/MEBuilder.swift @@ -225,48 +225,48 @@ extension MEProgram.Builder { bitset: DSLTree.CustomCharacterClass.AsciiBitset, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { instructions.append(.init( .quantify, - .init(quantify: .init(bitset: makeAsciiBitset(bitset), kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)))) + .init(quantify: .init(bitset: makeAsciiBitset(bitset), kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)))) } mutating func buildQuantify( asciiChar: UInt8, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { instructions.append(.init( .quantify, - .init(quantify: .init(asciiChar: asciiChar, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)))) + .init(quantify: .init(asciiChar: asciiChar, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)))) } mutating func buildQuantifyAny( matchesNewlines: Bool, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { instructions.append(.init( .quantify, - .init(quantify: .init(matchesNewlines: matchesNewlines, kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)))) + .init(quantify: .init(matchesNewlines: matchesNewlines, kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)))) } mutating func buildQuantify( model: _CharacterClassModel, _ kind: AST.Quantification.Kind, _ minTrips: Int, - _ extraTrips: Int?, + _ maxExtraTrips: Int?, isScalarSemantics: Bool ) { instructions.append(.init( .quantify, - .init(quantify: .init(model: model,kind, minTrips, extraTrips, isScalarSemantics: isScalarSemantics)))) + .init(quantify: .init(model: model,kind, minTrips, maxExtraTrips, isScalarSemantics: isScalarSemantics)))) } mutating func buildAccept() { diff --git a/Sources/_StringProcessing/Engine/MECapture.swift b/Sources/_StringProcessing/Engine/MECapture.swift index 4bea21133..9bb4ecb06 100644 --- a/Sources/_StringProcessing/Engine/MECapture.swift +++ b/Sources/_StringProcessing/Engine/MECapture.swift @@ -76,10 +76,7 @@ extension Processor { currentCaptureBegin = nil } - mutating func registerValue( - _ value: Any, - overwriteInitial: SavePoint? = nil - ) { + mutating func registerValue(_ value: Any) { _invariantCheck() defer { _invariantCheck() } diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index c6f55ee34..a0480cde6 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -3,8 +3,8 @@ extension Processor { let isScalarSemantics = payload.isScalarSemantics switch payload.type { - case .bitset: - return input.matchBitset( + case .asciiBitset: + return input.matchASCIIBitset( registers[payload.bitset], at: currentPosition, limitedBy: end, @@ -46,109 +46,119 @@ extension Processor { /// Generic quantify instruction interpreter /// - Handles .eager and .posessive - /// - Handles arbitrary minTrips and extraTrips + /// - Handles arbitrary minTrips and maxExtraTrips mutating func runQuantify(_ payload: QuantifyPayload) -> Bool { + assert(payload.quantKind != .reluctant) + var trips = 0 - var extraTrips = payload.extraTrips - var savePoint = startQuantifierSavePoint( - isScalarSemantics: payload.isScalarSemantics - ) + var maxExtraTrips = payload.maxExtraTrips - while true { - if trips >= payload.minTrips { - if extraTrips == 0 { break } - extraTrips = extraTrips.map({$0 - 1}) - if payload.quantKind == .eager { - savePoint.updateRange(newEnd: currentPosition) - } + while trips < payload.minTrips { + guard let next = _doQuantifyMatch(payload) else { + signalFailure() + return false } - let next = _doQuantifyMatch(payload) - guard let idx = next else { - if !savePoint.rangeIsEmpty { - // The last save point has saved the current, non-matching position, - // so it's unneeded. - savePoint.shrinkRange(input) - } - break - } - currentPosition = idx + currentPosition = next trips += 1 } - if trips < payload.minTrips { - signalFailure() - return false + if maxExtraTrips == 0 { + // We're done + return true + } + + guard let next = _doQuantifyMatch(payload) else { + return true + } + maxExtraTrips = maxExtraTrips.map { $0 - 1 } + + // Remember the range of valid positions in case we can create a quantified + // save point + let rangeStart = currentPosition + var rangeEnd = currentPosition + currentPosition = next + + while true { + if maxExtraTrips == 0 { break } + + guard let next = _doQuantifyMatch(payload) else { + break + } + maxExtraTrips = maxExtraTrips.map({$0 - 1}) + rangeEnd = currentPosition + currentPosition = next } - if !savePoint.rangeIsEmpty { - savePoints.append(savePoint) + if payload.quantKind == .eager { + savePoints.append(makeQuantifiedSavePoint( + rangeStart.. Bool { + /// Specialized quantify instruction interpreter for `*`, always succeeds + mutating func runEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) { assert(payload.quantKind == .eager && payload.minTrips == 0 - && payload.extraTrips == nil) - var savePoint = startQuantifierSavePoint( - isScalarSemantics: payload.isScalarSemantics - ) + && payload.maxExtraTrips == nil) + _doRunEagerZeroOrMoreQuantify(payload) + } - while true { - savePoint.updateRange(newEnd: currentPosition) - let next = _doQuantifyMatch(payload) - guard let idx = next else { break } - currentPosition = idx + // NOTE: So-as to inline into one-or-more call, which makes a significant + // performance difference + @inline(__always) + mutating func _doRunEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) { + guard let next = _doQuantifyMatch(payload) else { + // Consumed no input, no point saved + return } - // The last save point has saved the current position, so it's unneeded - savePoint.shrinkRange(input) - if !savePoint.rangeIsEmpty { - savePoints.append(savePoint) + // Create a quantified save point for every part of the input matched up + // to the final position. + let rangeStart = currentPosition + var rangeEnd = currentPosition + currentPosition = next + while true { + guard let next = _doQuantifyMatch(payload) else { break } + rangeEnd = currentPosition + currentPosition = next } - return true + + savePoints.append(makeQuantifiedSavePoint(rangeStart.. Bool { assert(payload.quantKind == .eager && payload.minTrips == 1 - && payload.extraTrips == nil) - var savePoint = startQuantifierSavePoint( - isScalarSemantics: payload.isScalarSemantics - ) - while true { - let next = _doQuantifyMatch(payload) - guard let idx = next else { break } - currentPosition = idx - savePoint.updateRange(newEnd: currentPosition) - } + && payload.maxExtraTrips == nil) - if savePoint.rangeIsEmpty { + // Match at least once + guard let next = _doQuantifyMatch(payload) else { signalFailure() return false } - // The last save point has saved the current position, so it's unneeded - savePoint.shrinkRange(input) - if !savePoint.rangeIsEmpty { - savePoints.append(savePoint) - } + + // Run `a+` as `aa*` + currentPosition = next + _doRunEagerZeroOrMoreQuantify(payload) return true } /// Specialized quantify instruction interpreter for ? mutating func runZeroOrOneQuantify(_ payload: QuantifyPayload) -> Bool { assert(payload.minTrips == 0 - && payload.extraTrips == 1) + && payload.maxExtraTrips == 1) let next = _doQuantifyMatch(payload) guard let idx = next else { return true // matched zero times } if payload.quantKind != .possessive { // Save the zero match - let savePoint = makeSavePoint(currentPC + 1) - savePoints.append(savePoint) + savePoints.append(makeSavePoint(resumingAt: currentPC+1)) } currentPosition = idx return true diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 6ecc49df7..86365322b 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -291,7 +291,7 @@ extension Processor { _ bitset: DSLTree.CustomCharacterClass.AsciiBitset, isScalarSemantics: Bool ) -> Bool { - guard let next = input.matchBitset( + guard let next = input.matchASCIIBitset( bitset, at: currentPosition, limitedBy: end, @@ -335,15 +335,14 @@ extension Processor { ) let idx = savePoints.index(before: savePoints.endIndex) - // If we have a quantifier save point, move the next range position into pos - if !savePoints[idx].rangeIsEmpty { - savePoints[idx].takePositionFromRange(input) - } - // If we have a normal save point or an empty quantifier save point, remove it - if savePoints[idx].rangeIsEmpty { - (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) = savePoints.removeLast().destructure - } else { + + // If we have a quantifier save point, move the next range position into + // pos instead of removing it + if savePoints[idx].isQuantified { + savePoints[idx].takePositionFromQuantifiedRange(input) (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) = savePoints[idx].destructure + } else { + (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) = savePoints.removeLast().destructure } assert(stackEnd.rawValue <= callStack.count) @@ -434,19 +433,19 @@ extension Processor { } case .save: let resumeAddr = payload.addr - let sp = makeSavePoint(resumeAddr) + let sp = makeSavePoint(resumingAt: resumeAddr) savePoints.append(sp) controller.step() case .saveAddress: let resumeAddr = payload.addr - let sp = makeSavePoint(resumeAddr, addressOnly: true) + let sp = makeAddressOnlySavePoint(resumingAt: resumeAddr) savePoints.append(sp) controller.step() case .splitSaving: let (nextPC, resumeAddr) = payload.pairedAddrAddr - let sp = makeSavePoint(resumeAddr) + let sp = makeSavePoint(resumingAt: resumeAddr) savePoints.append(sp) controller.pc = nextPC @@ -518,12 +517,13 @@ extension Processor { case .quantify: let quantPayload = payload.quantify let matched: Bool - switch (quantPayload.quantKind, quantPayload.minTrips, quantPayload.extraTrips) { + switch (quantPayload.quantKind, quantPayload.minTrips, quantPayload.maxExtraTrips) { case (.reluctant, _, _): assertionFailure(".reluctant is not supported by .quantify") return case (.eager, 0, nil): - matched = runEagerZeroOrMoreQuantify(quantPayload) + runEagerZeroOrMoreQuantify(quantPayload) + matched = true case (.eager, 1, nil): matched = runEagerOneOrMoreQuantify(quantPayload) case (_, 0, 1): @@ -632,9 +632,7 @@ extension Processor { let (val, cap) = payload.pairedValueCapture let value = registers[val] let capNum = Int(asserting: cap.rawValue) - let sp = makeSavePoint(self.currentPC) - storedCaptures[capNum].registerValue( - value, overwriteInitial: sp) + storedCaptures[capNum].registerValue(value) controller.step() } } @@ -725,22 +723,53 @@ extension String { return idx } - func matchBitset( + func matchASCIIBitset( _ bitset: DSLTree.CustomCharacterClass.AsciiBitset, at pos: Index, limitedBy end: Index, isScalarSemantics: Bool ) -> Index? { - // TODO: extremely quick-check-able - // TODO: can be sped up with string internals - if isScalarSemantics { - guard pos < end else { return nil } - guard bitset.matches(unicodeScalars[pos]) else { return nil } - return unicodeScalars.index(after: pos) - } else { - guard let (char, next) = characterAndEnd(at: pos, limitedBy: end), - bitset.matches(char) else { return nil } - return next + + // FIXME: Inversion should be tracked and handled in only one place. + // That is, we should probably store it as a bit in the instruction, so that + // bitset matching and bitset inversion is bit-based rather that semantically + // inverting the notion of a match or not. As-is, we need to track both + // meanings in some code paths. + let isInverted = bitset.isInverted + + // TODO: More fodder for refactoring `_quickASCIICharacter`, see the comment + // there + guard let (asciiByte, next, isCRLF) = _quickASCIICharacter( + at: pos, + limitedBy: end + ) else { + if isScalarSemantics { + guard pos < end else { return nil } + guard bitset.matches(unicodeScalars[pos]) else { return nil } + return unicodeScalars.index(after: pos) + } else { + guard let (char, next) = characterAndEnd(at: pos, limitedBy: end), + bitset.matches(char) else { return nil } + return next + } + } + + guard bitset.matches(asciiByte) else { + // FIXME: check inversion here after refactored out of bitset + return nil + } + + // CR-LF should only match `[\r]` in scalar semantic mode or if inverted + if isCRLF { + if isScalarSemantics { + return self.unicodeScalars.index(before: next) + } + if isInverted { + return next + } + return nil } + + return next } } diff --git a/Sources/_StringProcessing/Engine/Tracing.swift b/Sources/_StringProcessing/Engine/Tracing.swift index b0ce67555..90445d5ec 100644 --- a/Sources/_StringProcessing/Engine/Tracing.swift +++ b/Sources/_StringProcessing/Engine/Tracing.swift @@ -93,7 +93,7 @@ extension Instruction: CustomStringConvertible { return "\(opcode) \(imm) -> int[\(reg)]" case .quantify: let payload = payload.quantify - return "\(opcode) \(payload.type) \(payload.minTrips) \(payload.extraTrips?.description ?? "unbounded" )" + return "\(opcode) \(payload.type) \(payload.minTrips) \(payload.maxExtraTrips?.description ?? "unbounded" )" case .save: let resumeAddr = payload.addr return "\(opcode) \(resumeAddr)" @@ -118,11 +118,12 @@ extension Processor.SavePoint { if let p = self.pos { posStr = "\(input.distance(from: input.startIndex, to: p))" } else { - if rangeIsEmpty { + if !isQuantified { posStr = "" } else { - let startStr = "\(input.distance(from: input.startIndex, to: rangeStart!))" - let endStr = "\(input.distance(from: input.startIndex, to: rangeEnd!))" + let range = quantifiedRange! + let startStr = "\(input.distance(from: input.startIndex, to: range.lowerBound))" + let endStr = "\(input.distance(from: input.startIndex, to: range.upperBound))" posStr = "\(startStr)...\(endStr)" } } diff --git a/Sources/_StringProcessing/Utility/AsciiBitset.swift b/Sources/_StringProcessing/Utility/AsciiBitset.swift index e063447a0..2b0217cdc 100644 --- a/Sources/_StringProcessing/Utility/AsciiBitset.swift +++ b/Sources/_StringProcessing/Utility/AsciiBitset.swift @@ -1,3 +1,4 @@ +// TODO: Probably refactor out of DSLTree extension DSLTree.CustomCharacterClass { internal struct AsciiBitset { let isInverted: Bool @@ -49,7 +50,7 @@ extension DSLTree.CustomCharacterClass { } } - private func matches(_ val: UInt8) -> Bool { + private func _matchesWithoutInversionCheck(_ val: UInt8) -> Bool { if val < 64 { return (a >> val) & 1 == 1 } else { @@ -57,10 +58,15 @@ extension DSLTree.CustomCharacterClass { } } + internal func matches(_ byte: UInt8) -> Bool { + guard byte < 128 else { return isInverted } + return _matchesWithoutInversionCheck(byte) == !isInverted + } + internal func matches(_ char: Character) -> Bool { let matched: Bool if let val = char._singleScalarAsciiValue { - matched = matches(val) + matched = _matchesWithoutInversionCheck(val) } else { matched = false } @@ -75,7 +81,7 @@ extension DSLTree.CustomCharacterClass { let matched: Bool if scalar.isASCII { let val = UInt8(ascii: scalar) - matched = matches(val) + matched = _matchesWithoutInversionCheck(val) } else { matched = false } diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index 06b6ff1a3..5ef16e37c 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -1846,8 +1846,7 @@ extension RegexDSLTests { func testLabeledCaptures_labeledCapture() throws { guard #available(macOS 13, *) else { - XCTSkip("Fix only exists on macOS 13") - return + throw XCTSkip("Fix only exists on macOS 13") } // The output type of a regex with a labeled capture is dropped. let dslWithLabeledCapture = Regex { @@ -1886,8 +1885,7 @@ extension RegexDSLTests { func testLabeledCaptures_bothCapture() throws { guard #available(macOS 13, *) else { - XCTSkip("Fix only exists on macOS 13") - return + throw XCTSkip("Fix only exists on macOS 13") } // Only the output type of a regex with a labeled capture is dropped, // outputs of other regexes in the same DSL are concatenated. @@ -1913,8 +1911,7 @@ extension RegexDSLTests { func testLabeledCaptures_tooManyCapture() throws { guard #available(macOS 13, *) else { - XCTSkip("Fix only exists on macOS 13") - return + throw XCTSkip("Fix only exists on macOS 13") } // The output type of a regex with too many captures is dropped. // "Too many" means the left and right output types would add up to >= 10. diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 791c0850a..07d75d722 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -2666,13 +2666,13 @@ extension RegexTests { } func testQuantifyOptimization() throws { - // test that the maximum values for minTrips and extraTrips are handled correctly + // test that the maximum values for minTrips and maxExtraTrips are handled correctly let maxStorable = Int(QuantifyPayload.maxStorableTrips) - let maxExtraTrips = "a{,\(maxStorable)}" - expectProgram(for: maxExtraTrips, contains: [.quantify]) - firstMatchTest(maxExtraTrips, input: String(repeating: "a", count: maxStorable), match: String(repeating: "a", count: maxStorable)) - firstMatchTest(maxExtraTrips, input: String(repeating: "a", count: maxStorable + 1), match: String(repeating: "a", count: maxStorable)) - XCTAssertNil(try Regex(maxExtraTrips).wholeMatch(in: String(repeating: "a", count: maxStorable + 1))) + let maxmaxExtraTrips = "a{,\(maxStorable)}" + expectProgram(for: maxmaxExtraTrips, contains: [.quantify]) + firstMatchTest(maxmaxExtraTrips, input: String(repeating: "a", count: maxStorable), match: String(repeating: "a", count: maxStorable)) + firstMatchTest(maxmaxExtraTrips, input: String(repeating: "a", count: maxStorable + 1), match: String(repeating: "a", count: maxStorable)) + XCTAssertNil(try Regex(maxmaxExtraTrips).wholeMatch(in: String(repeating: "a", count: maxStorable + 1))) let maxMinTrips = "a{\(maxStorable),}" expectProgram(for: maxMinTrips, contains: [.quantify])