diff --git a/Package.swift b/Package.swift index b30c402c4..5d45950db 100644 --- a/Package.swift +++ b/Package.swift @@ -8,6 +8,10 @@ let availabilityDefinition = PackageDescription.SwiftSetting.unsafeFlags([ "-define-availability", "-Xfrontend", "SwiftStdlib 5.7:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999", + "-Xfrontend", + "-define-availability", + "-Xfrontend", + "SwiftStdlib 5.8:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999", ]) /// Swift settings for building a private stdlib-like module that is to be used @@ -87,7 +91,7 @@ let package = Package( name: "RegexBuilderTests", dependencies: ["_StringProcessing", "RegexBuilder", "TestSupport"], swiftSettings: [ - .unsafeFlags(["-Xfrontend", "-disable-availability-checking"]) + availabilityDefinition ]), .testTarget( name: "DocumentationTests", diff --git a/Sources/RegexBuilder/DSL.swift b/Sources/RegexBuilder/DSL.swift index 152aadd0c..680f3bd2f 100644 --- a/Sources/RegexBuilder/DSL.swift +++ b/Sources/RegexBuilder/DSL.swift @@ -508,3 +508,65 @@ extension Regex.Match { internal func makeFactory() -> _RegexFactory { _RegexFactory() } + +/// These are special `accumulate` methods that wrap one or both components in +/// a node that indicates that that their output types shouldn't be included in +/// the resulting strongly-typed output type. This is required from a +/// `buildPartialBlock` call where a component's output type is either ignored +/// or not included in the resulting type. For example: +/// +/// static func buildPartialBlock( +/// accumulated: R0, next: R1 +/// ) -> Regex<(Substring, C1)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1) +/// +/// In this `buildPartialBlock` overload, `W0` isn't included in the +/// resulting output type, even though it can match any output type, including +/// a tuple. When `W0` matches a tuple type that doesn't match another overload +/// (because of arity or labels) we need this "ignoring" variant so that we +/// don't have a type mismatch when we ultimately cast the type-erased output +/// to the expected type. +@available(SwiftStdlib 5.7, *) +extension _RegexFactory { + /// Concatenates the `left` and `right` component, wrapping `right` to + /// indicate that its output type shouldn't be included in the resulting + /// strongly-typed output type. + @_alwaysEmitIntoClient + internal func accumulate( + _ left: some RegexComponent, + ignoringOutputTypeOf right: some RegexComponent + ) -> Regex { + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return accumulate(left, ignoreCapturesInTypedOutput(right)) + } + return accumulate(left, right) + } + + /// Concatenates the `left` and `right` component, wrapping `left` to + /// indicate that its output type shouldn't be included in the resulting + /// strongly-typed output type. + @_alwaysEmitIntoClient + internal func accumulate( + ignoringOutputTypeOf left: some RegexComponent, + _ right: some RegexComponent + ) -> Regex { + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return accumulate(ignoreCapturesInTypedOutput(left), right) + } + return accumulate(left, right) + } + + /// Concatenates the `left` and `right` component, wrapping both sides to + /// indicate that their output types shouldn't be included in the resulting + /// strongly-typed output type. + @_alwaysEmitIntoClient + internal func accumulate( + ignoringOutputTypeOf left: some RegexComponent, + andAlso right: some RegexComponent + ) -> Regex { + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return accumulate( + ignoreCapturesInTypedOutput(left), ignoreCapturesInTypedOutput(right)) + } + return accumulate(left, right) + } +} diff --git a/Sources/RegexBuilder/Variadics.swift b/Sources/RegexBuilder/Variadics.swift index 0f19cd6b0..f11727521 100644 --- a/Sources/RegexBuilder/Variadics.swift +++ b/Sources/RegexBuilder/Variadics.swift @@ -2,7 +2,7 @@ // // This source file is part of the Swift.org open source project // -// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Copyright (c) 2021-2023 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information @@ -20,7 +20,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -30,7 +30,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -40,7 +40,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -50,7 +50,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3, C4)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3, C4) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -60,7 +60,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3, C4, C5)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3, C4, C5) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -70,7 +70,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3, C4, C5, C6)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -80,7 +80,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3, C4, C5, C6, C7)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -90,7 +90,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3, C4, C5, C6, C7, C8)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -100,7 +100,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3, C4, C5, C6, C7, C8, C9)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -110,7 +110,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -565,123 +565,112 @@ extension RegexComponentBuilder { } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex where R0.RegexOutput == W0 { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, andAlso: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0)> where R0.RegexOutput == (W0, C0) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1)> where R0.RegexOutput == (W0, C0, C1) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2)> where R0.RegexOutput == (W0, C0, C1, C2) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3)> where R0.RegexOutput == (W0, C0, C1, C2, C3) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6, C7)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6, C7, C8)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6, C7, C8, C9)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @@ -6884,7 +6873,3 @@ extension TryCapture { self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } - - - -// END AUTO-GENERATED CONTENT diff --git a/Sources/VariadicsGenerator/VariadicsGenerator.swift b/Sources/VariadicsGenerator/VariadicsGenerator.swift index a971dafd7..42673ccfd 100644 --- a/Sources/VariadicsGenerator/VariadicsGenerator.swift +++ b/Sources/VariadicsGenerator/VariadicsGenerator.swift @@ -132,7 +132,7 @@ struct VariadicsGenerator: ParsableCommand { // // This source file is part of the Swift.org open source project // - // Copyright (c) 2021-2022 Apple Inc. and the Swift project authors + // Copyright (c) 2021-2023 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information @@ -262,7 +262,20 @@ struct VariadicsGenerator: ParsableCommand { accumulated: R0, next: R1 ) -> \(regexTypeName)<\(matchType)> \(whereClause) { let factory = makeFactory() + + """) + if leftArity == 0 { + output(""" + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) + + """) + } else { + output(""" return factory.accumulate(accumulated, next) + + """) + } + output(""" } } @@ -274,7 +287,6 @@ struct VariadicsGenerator: ParsableCommand { output(""" \(defaultAvailableAttr) extension \(concatBuilderName) { - \(defaultAvailableAttr) @_alwaysEmitIntoClient public static func buildPartialBlock CaptureList { var builder = Self() - builder.captures.append(.init(optionalDepth: 0, .fake)) - builder.addCaptures(of: ast.root, optionalNesting: .init(canNest: false)) + builder.captures.append(.init(optionalDepth: 0, visibleInTypedOutput: true, .fake)) + builder.addCaptures(of: ast.root, optionalNesting: .init(canNest: false), visibleInTypedOutput: true) return builder.captures } } diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index e0a6c7465..15e052901 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -874,7 +874,7 @@ fileprivate extension Compiler.ByteCodeGen { switch node { case .concatenation(let ch): return ch.flatMap(flatten) - case .convertedRegexLiteral(let n, _): + case .convertedRegexLiteral(let n, _), .ignoreCapturesInTypedOutput(let n): return flatten(n) default: return [node] @@ -951,6 +951,9 @@ fileprivate extension Compiler.ByteCodeGen { case let .nonCapturingGroup(kind, child): try emitNoncapturingGroup(kind.ast, child) + case let .ignoreCapturesInTypedOutput(child): + try emitNode(child) + case .conditional: throw Unsupported("Conditionals") diff --git a/Sources/_StringProcessing/Capture.swift b/Sources/_StringProcessing/Capture.swift index b75d01392..696a85361 100644 --- a/Sources/_StringProcessing/Capture.swift +++ b/Sources/_StringProcessing/Capture.swift @@ -61,7 +61,7 @@ extension Sequence where Element == AnyRegexOutput.Element { // and traffic through existentials @available(SwiftStdlib 5.7, *) func existentialOutput(from input: String) -> Any { - let elements = map { + let elements = filter(\.representation.visibleInTypedOutput).map { $0.existentialOutputComponent(from: input) } return elements.count == 1 diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift index 3a2731b0a..705b354fb 100644 --- a/Sources/_StringProcessing/ConsumerInterface.swift +++ b/Sources/_StringProcessing/ConsumerInterface.swift @@ -42,7 +42,7 @@ extension DSLTree.Node { case .orderedChoice, .conditional, .concatenation, .capture, .nonCapturingGroup, .quantification, .trivia, .empty, - .absentFunction: return nil + .ignoreCapturesInTypedOutput, .absentFunction: return nil case .consumer: fatalError("FIXME: Is this where we handle them?") diff --git a/Sources/_StringProcessing/Engine/Structuralize.swift b/Sources/_StringProcessing/Engine/Structuralize.swift index bc3adf701..32d7a6204 100644 --- a/Sources/_StringProcessing/Engine/Structuralize.swift +++ b/Sources/_StringProcessing/Engine/Structuralize.swift @@ -14,7 +14,8 @@ extension CaptureList { optionalDepth: cap.optionalDepth, content: meStored.deconstructed, name: cap.name, - referenceID: list.referencedCaptureOffsets.first { $1 == i }?.key + referenceID: list.referencedCaptureOffsets.first { $1 == i }?.key, + visibleInTypedOutput: cap.visibleInTypedOutput ) result.append(element) diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index 953df6882..8b456f37d 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -131,6 +131,9 @@ extension PrettyPrinter { printer.printAsPattern(convertedFromAST: child) } + case let .ignoreCapturesInTypedOutput(child): + printAsPattern(convertedFromAST: child, isTopLevel: isTopLevel) + case .conditional: print("/* TODO: conditional */") diff --git a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift index fd292ed1b..243c1ba01 100644 --- a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift +++ b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift @@ -359,6 +359,10 @@ extension AnyRegexOutput { /// The capture reference this element refers to. var referenceID: ReferenceID? = nil + + /// A Boolean value indicating whether this capture should be included in + /// the typed output. + var visibleInTypedOutput: Bool } internal init(input: String, elements: [ElementRepresentation]) { diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 0a0831706..93e86c607 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -42,6 +42,9 @@ extension DSLTree { /// Matches a noncapturing subpattern. case nonCapturingGroup(_AST.GroupKind, Node) + /// Marks all captures in a subpattern as ignored in strongly-typed output. + case ignoreCapturesInTypedOutput(Node) + // TODO: Consider splitting off grouped conditions, or have // our own kind @@ -340,6 +343,27 @@ typealias _CharacterPredicateInterface = ( */ extension DSLTree.Node { + /// Indicates whether this node has at least one child node (among other + /// associated values). + var hasChildNodes: Bool { + switch self { + case .trivia, .empty, .quotedLiteral, + .consumer, .matcher, .characterPredicate, + .customCharacterClass, .atom: + return false + + case .orderedChoice(let c), .concatenation(let c): + return !c.isEmpty + + case .convertedRegexLiteral, .capture, .nonCapturingGroup, + .quantification, .ignoreCapturesInTypedOutput, .conditional: + return true + + case .absentFunction(let abs): + return !abs.ast.children.isEmpty + } + } + @_spi(RegexBuilder) public var children: [DSLTree.Node] { switch self { @@ -354,6 +378,7 @@ extension DSLTree.Node { case let .capture(_, _, n, _): return [n] case let .nonCapturingGroup(_, n): return [n] case let .quantification(_, _, n): return [n] + case let .ignoreCapturesInTypedOutput(n): return [n] case let .conditional(_, t, f): return [t,f] @@ -403,11 +428,13 @@ extension DSLTree { } extension DSLTree { + /// Indicates whether this DSLTree contains any capture groups. var hasCapture: Bool { root.hasCapture } } extension DSLTree.Node { + /// Indicates whether this DSLTree node contains any capture groups. var hasCapture: Bool { switch self { case .capture: @@ -572,52 +599,55 @@ struct CaptureTransform: Hashable, CustomStringConvertible { extension CaptureList.Builder { mutating func addCaptures( - of node: DSLTree.Node, optionalNesting nesting: OptionalNesting + of node: DSLTree.Node, optionalNesting nesting: OptionalNesting, visibleInTypedOutput: Bool ) { switch node { case let .orderedChoice(children): for child in children { - addCaptures(of: child, optionalNesting: nesting.addingOptional) + addCaptures(of: child, optionalNesting: nesting.addingOptional, visibleInTypedOutput: visibleInTypedOutput) } case let .concatenation(children): for child in children { - addCaptures(of: child, optionalNesting: nesting) + addCaptures(of: child, optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput) } case let .capture(name, _, child, transform): captures.append(.init( name: name, type: transform?.resultType ?? child.wholeMatchType, - optionalDepth: nesting.depth, .fake)) - addCaptures(of: child, optionalNesting: nesting) + optionalDepth: nesting.depth, visibleInTypedOutput: visibleInTypedOutput, .fake)) + addCaptures(of: child, optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput) case let .nonCapturingGroup(kind, child): assert(!kind.ast.isCapturing) - addCaptures(of: child, optionalNesting: nesting) + addCaptures(of: child, optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput) + + case let .ignoreCapturesInTypedOutput(child): + addCaptures(of: child, optionalNesting: nesting, visibleInTypedOutput: false) case let .conditional(cond, trueBranch, falseBranch): switch cond.ast { case .group(let g): - addCaptures(of: .group(g), optionalNesting: nesting) + addCaptures(of: .group(g), optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput) default: break } - addCaptures(of: trueBranch, optionalNesting: nesting.addingOptional) - addCaptures(of: falseBranch, optionalNesting: nesting.addingOptional) + addCaptures(of: trueBranch, optionalNesting: nesting.addingOptional, visibleInTypedOutput: visibleInTypedOutput) + addCaptures(of: falseBranch, optionalNesting: nesting.addingOptional, visibleInTypedOutput: visibleInTypedOutput) case let .quantification(amount, _, child): var optNesting = nesting if amount.ast.bounds.atLeast == 0 { optNesting = optNesting.addingOptional } - addCaptures(of: child, optionalNesting: optNesting) + addCaptures(of: child, optionalNesting: optNesting, visibleInTypedOutput: visibleInTypedOutput) case let .absentFunction(abs): switch abs.ast.kind { case .expression(_, _, let child): - addCaptures(of: child, optionalNesting: nesting) + addCaptures(of: child, optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput) case .clearer, .repeater, .stopper: break } @@ -625,7 +655,7 @@ extension CaptureList.Builder { case let .convertedRegexLiteral(n, _): // We disable nesting for converted AST trees, as literals do not nest // captures. This includes literals nested in a DSL. - return addCaptures(of: n, optionalNesting: nesting.disablingNesting) + return addCaptures(of: n, optionalNesting: nesting.disablingNesting, visibleInTypedOutput: visibleInTypedOutput) case .matcher: break @@ -639,8 +669,8 @@ extension CaptureList.Builder { static func build(_ dsl: DSLTree) -> CaptureList { var builder = Self() builder.captures.append( - .init(type: dsl.root.wholeMatchType, optionalDepth: 0, .fake)) - builder.addCaptures(of: dsl.root, optionalNesting: .init(canNest: true)) + .init(type: dsl.root.wholeMatchType, optionalDepth: 0, visibleInTypedOutput: true, .fake)) + builder.addCaptures(of: dsl.root, optionalNesting: .init(canNest: true), visibleInTypedOutput: true) return builder.captures } } @@ -650,7 +680,7 @@ extension DSLTree.Node { /// output but forwarding its only child's output. var isOutputForwarding: Bool { switch self { - case .nonCapturingGroup: + case .nonCapturingGroup, .ignoreCapturesInTypedOutput: return true case .orderedChoice, .concatenation, .capture, .conditional, .quantification, .customCharacterClass, .atom, @@ -710,6 +740,7 @@ extension DSLTree { case let .capture(_, _, n, _): return [_Tree(n)] case let .nonCapturingGroup(_, n): return [_Tree(n)] case let .quantification(_, _, n): return [_Tree(n)] + case let .ignoreCapturesInTypedOutput(n): return [_Tree(n)] case let .conditional(_, t, f): return [_Tree(t), _Tree(f)] diff --git a/Sources/_StringProcessing/Utility/RegexFactory.swift b/Sources/_StringProcessing/Utility/RegexFactory.swift index e0df906fa..584772921 100644 --- a/Sources/_StringProcessing/Utility/RegexFactory.swift +++ b/Sources/_StringProcessing/Utility/RegexFactory.swift @@ -20,6 +20,16 @@ public struct _RegexFactory { // Hide is behind an SPI that only RegexBuilder can use. @_spi(RegexBuilder) public init() {} + + @available(SwiftStdlib 5.8, *) + public func ignoreCapturesInTypedOutput( + _ child: some RegexComponent + ) -> Regex { + // Don't wrap `child` again if it's a leaf node. + child.regex.root.hasChildNodes + ? .init(node: .ignoreCapturesInTypedOutput(child.regex.root)) + : .init(node: child.regex.root) + } @available(SwiftStdlib 5.7, *) public func accumulate( diff --git a/Tests/RegexBuilderTests/AlgorithmsTests.swift b/Tests/RegexBuilderTests/AlgorithmsTests.swift index dcaddd9d7..7d24e30af 100644 --- a/Tests/RegexBuilderTests/AlgorithmsTests.swift +++ b/Tests/RegexBuilderTests/AlgorithmsTests.swift @@ -13,6 +13,7 @@ import XCTest import _StringProcessing import RegexBuilder +@available(SwiftStdlib 5.7, *) class RegexConsumerTests: XCTestCase { func testMatches() { let regex = Capture(OneOrMore(.digit)) { 2 * Int($0)! } @@ -105,6 +106,7 @@ class RegexConsumerTests: XCTestCase { } } +@available(SwiftStdlib 5.7, *) class AlgorithmsResultBuilderTests: XCTestCase { enum MatchAlgo { case whole diff --git a/Tests/RegexBuilderTests/AnyRegexOutputTests.swift b/Tests/RegexBuilderTests/AnyRegexOutputTests.swift index e6c3214b9..165d1d411 100644 --- a/Tests/RegexBuilderTests/AnyRegexOutputTests.swift +++ b/Tests/RegexBuilderTests/AnyRegexOutputTests.swift @@ -5,6 +5,7 @@ import RegexBuilder private let enablePrinting = false +@available(SwiftStdlib 5.7, *) extension RegexDSLTests { func testContrivedAROExample() { diff --git a/Tests/RegexBuilderTests/CustomTests.swift b/Tests/RegexBuilderTests/CustomTests.swift index d34b5689f..85186b684 100644 --- a/Tests/RegexBuilderTests/CustomTests.swift +++ b/Tests/RegexBuilderTests/CustomTests.swift @@ -14,10 +14,12 @@ import _StringProcessing @testable import RegexBuilder // A nibbler processes a single character from a string +@available(SwiftStdlib 5.7, *) private protocol Nibbler: CustomConsumingRegexComponent { func nibble(_: Character) -> RegexOutput? } +@available(SwiftStdlib 5.7, *) extension Nibbler { // Default implementation, just feed the character in func consuming( @@ -34,6 +36,7 @@ extension Nibbler { // A number nibbler +@available(SwiftStdlib 5.7, *) private struct Numbler: Nibbler { typealias RegexOutput = Int func nibble(_ c: Character) -> Int? { @@ -42,6 +45,7 @@ private struct Numbler: Nibbler { } // An ASCII value nibbler +@available(SwiftStdlib 5.7, *) private struct Asciibbler: Nibbler { typealias RegexOutput = UInt8 func nibble(_ c: Character) -> UInt8? { @@ -49,6 +53,7 @@ private struct Asciibbler: Nibbler { } } +@available(SwiftStdlib 5.7, *) private struct IntParser: CustomConsumingRegexComponent { struct ParseError: Error, Hashable {} typealias RegexOutput = Int @@ -71,6 +76,7 @@ private struct IntParser: CustomConsumingRegexComponent { } } +@available(SwiftStdlib 5.7, *) private struct CurrencyParser: CustomConsumingRegexComponent { enum Currency: String, Hashable { case usd = "USD" @@ -117,9 +123,12 @@ enum MatchCall { case firstMatch } -func customTest( +@available(SwiftStdlib 5.7, *) +fileprivate func customTest( _ regex: Regex, - _ tests: (input: String, call: MatchCall, match: Match?)... + _ tests: (input: String, call: MatchCall, match: Match?)..., + file: StaticString = #file, + line: UInt = #line ) { for (input, call, match) in tests { let result: Match? @@ -129,7 +138,40 @@ func customTest( case .firstMatch: result = input.firstMatch(of: regex)?.output } - XCTAssertEqual(result, match) + XCTAssertEqual(result, match, file: file, line: line) + } +} + +@available(SwiftStdlib 5.7, *) +fileprivate func customTest( + _ regex: some RegexComponent, + _ isEquivalent: (Match, Match) -> Bool, + _ tests: (input: String, call: MatchCall, match: Match?)..., + file: StaticString = #file, + line: UInt = #line +) { + for (input, call, match) in tests { + let result: Match? + switch call { + case .match: + result = input.wholeMatch(of: regex)?.output + case .firstMatch: + result = input.firstMatch(of: regex)?.output + } + switch (result, match) { + case let (result?, match?): + XCTAssert( + isEquivalent(result, match), + "'\(result)' isn't equal to '\(match)'.", + file: file, line: line) + case (nil, nil): + // Success + break + case (nil, _): + XCTFail("No match when expected", file: file, line: line) + case (_, nil): + XCTFail("Unexpected match", file: file, line: line) + } } } @@ -178,6 +220,7 @@ extension Concat: BidirectionalCollection { } } +@available(SwiftStdlib 5.7, *) class CustomRegexComponentTests: XCTestCase { // TODO: Refactor below into more exhaustive, declarative // tests. @@ -211,39 +254,91 @@ class CustomRegexComponentTests: XCTestCase { ("55z", .match, nil), ("55z", .firstMatch, 5)) - // TODO: Convert below tests to better infra. Right now - // it's hard because `Match` is constrained to be - // `Equatable` which tuples cannot be. +// customTest( +// Regex { +// #/(?\D+)/# +// Optionally("~") +// }, +// ("ab123c", .firstMatch, "ab"), +// ("abc", .firstMatch, "abc"), +// ("123", .firstMatch, nil), +// ("a55z", .match, nil), +// ("a55z", .firstMatch, "a")) + + customTest( + Regex<(Substring, Substring, Int)> { + #/(\D+)/# + Capture(Numbler()) + }, + ==, + ("ab123c", .firstMatch, ("ab1", "ab", 1)), + ("abc", .firstMatch, nil), + ("123", .firstMatch, nil), + ("a55z", .match, nil), + ("a55z", .firstMatch, ("a5", "a", 5))) - let regex3 = Regex { - Capture { + customTest( + Regex<(Substring, prefix: Substring)> { + #/(?\D+)/# + }, + ==, + ("ab123c", .firstMatch, ("ab", "ab")), + ("abc", .firstMatch, ("abc", "abc")), + ("123", .firstMatch, nil), + ("a55z", .match, nil), + ("a55z", .firstMatch, ("a", "a"))) + +// customTest( +// Regex<(Substring, Int)> { +// #/(?\D+)/# +// Capture(Numbler()) +// }, +// ==, +// ("ab123c", .firstMatch, ("ab1", 1)), +// ("abc", .firstMatch, nil), +// ("123", .firstMatch, nil), +// ("a55z", .match, nil), +// ("a55z", .firstMatch, ("a5", 5))) + +// customTest( +// Regex<(Substring, Int, Substring)> { +// #/(?\D+)/# +// Regex { +// Capture(Numbler()) +// Capture(OneOrMore(.word)) +// } +// }, +// ==, +// ("ab123c", .firstMatch, ("ab123c", 1, "23c")), +// ("abc", .firstMatch, nil), +// ("123", .firstMatch, nil), +// ("a55z", .match, ("a55z", 5, "5z")), +// ("a55z", .firstMatch, ("a55z", 5, "5z"))) + + customTest( + Regex<(Substring, Substring)> { + Capture { + OneOrMore { + Numbler() + } + } + }, + ==, + ("abc123", .firstMatch, ("123", "123")), + ("abc123", .match, nil), + ("abc", .firstMatch, nil)) + + customTest( + Regex<(Substring, Int)> { OneOrMore { - Numbler() + Capture { Numbler() } } - } - } - - let str = "ab123c" - let res3 = try XCTUnwrap(str.firstMatch(of: regex3)) - - let expectedSubstring = str.dropFirst(2).prefix(3) - XCTAssertEqual(res3.range, expectedSubstring.startIndex.. TransactionKind? in TransactionKind(rawValue: String(s)) } @@ -322,7 +323,7 @@ extension RegexDSLTests { Repeat(.digit, count: 2) Repeat(.digit, count: 2) Repeat(.digit, count: 4) - } transform: { (s: Substring) in + } transform: { (s: Substring) -> Date? in Date(mmddyyyy: String(s)) } @@ -345,7 +346,7 @@ extension RegexDSLTests { OneOrMore(.digit) "." Repeat(.digit, count: 2) - } transform: { (s: Substring) in + } transform: { (s: Substring) -> Double? in Double(s) } } diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index 8b7611536..0dd050357 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -14,6 +14,7 @@ import _StringProcessing import RegexBuilder import TestSupport +@available(SwiftStdlib 5.7, *) class RegexDSLTests: XCTestCase { func _testDSLCaptures( _ tests: (input: String, expectedCaptures: MatchType?)..., @@ -52,31 +53,31 @@ class RegexDSLTests: XCTestCase { file: file, line: line) } } - + func testSimpleStrings() throws { let regex = Regex { "a" Capture(Character("b")) // Character - TryCapture("1") { Int($0) } // Int + TryCapture { "1" } transform: { Int($0) } // Int } // Assert the inferred capture type. let _: (Substring, Substring, Int).Type = type(of: regex).RegexOutput.self let maybeMatch = "ab1".wholeMatch(of: regex) let match = try XCTUnwrap(maybeMatch) XCTAssertTrue(match.output == ("ab1", "b", 1)) - + let substring = "ab1"[...] let substringMatch = try XCTUnwrap(substring.wholeMatch(of: regex)) XCTAssertTrue(match.output == substringMatch.output) } - + let allNewlines = "\u{A}\u{B}\u{C}\u{D}\r\n\u{85}\u{2028}\u{2029}" let asciiNewlines = "\u{A}\u{B}\u{C}\u{D}\r\n" - + func testCharacterClasses() throws { // Must have new stdlib for character class ranges. guard ensureNewStdlib() else { return } - + try _testDSLCaptures( ("a c", ("a c", " ", "c")), matchType: (Substring, Substring, Substring).self, ==) @@ -94,7 +95,7 @@ class RegexDSLTests: XCTestCase { OneOrMore { CharacterClass("a"..."z", .digit) } - + // Second group OneOrMore { ChoiceOf { @@ -103,7 +104,7 @@ class RegexDSLTests: XCTestCase { } } } - + try _testDSLCaptures( ("abc1def2", ("abc1def2", "abc1")), matchType: (Substring, Substring).self, ==) @@ -112,12 +113,12 @@ class RegexDSLTests: XCTestCase { OneOrMore(.digit.inverted) ("a"..."z").inverted } - + OneOrMore { CharacterClass.whitespace.inverted } } - + // `.newlineSequence` and `.verticalWhitespace` match the same set of // newlines in grapheme semantic mode, and scalar mode when applied with // OneOrMore. @@ -146,7 +147,7 @@ class RegexDSLTests: XCTestCase { } }.matchingSemantics(mode) } - + // Try with ASCII-only whitespace. try _testDSLCaptures( ("\n", ("\n", "\n")), @@ -173,7 +174,7 @@ class RegexDSLTests: XCTestCase { } } } - + // `.newlineSequence` in scalar mode may match a single `\r\n`. // `.verticalWhitespace` may not. for asciiOnly in [true, false] { @@ -224,7 +225,7 @@ class RegexDSLTests: XCTestCase { }.matchingSemantics(.unicodeScalar).asciiOnlyWhitespace(asciiOnly) } } - + // Make sure horizontal whitespace does not match newlines or other // vertical whitespace. try _testDSLCaptures( @@ -237,7 +238,7 @@ class RegexDSLTests: XCTestCase { { OneOrMore(.horizontalWhitespace) } - + // Horizontal whitespace in ASCII mode. try _testDSLCaptures( (" \u{9} \t ", " \u{9} \t "), @@ -249,11 +250,11 @@ class RegexDSLTests: XCTestCase { }.asciiOnlyWhitespace() } } - + func testCharacterClassOperations() throws { // Must have new stdlib for character class ranges. guard ensureNewStdlib() else { return } - + try _testDSLCaptures( ("bcdefn1a", "bcdefn1a"), ("nbcdef1a", nil), // fails symmetric difference lookahead @@ -265,15 +266,15 @@ class RegexDSLTests: XCTestCase { let disallowedChars = CharacterClass.hexDigit .symmetricDifference("a"..."z") NegativeLookahead(disallowedChars) // No: 0-9 + g-z - + OneOrMore(("b"..."g").union("d"..."n")) // b-n CharacterClass.digit.subtracting("3"..."9") // 1, 2, non-ascii digits - + CharacterClass.hexDigit.intersection("a"..."z") // a-f } } - + func testAny() throws { // .any matches newlines regardless of matching options. for dotMatchesNewline in [true, false] { @@ -286,7 +287,7 @@ class RegexDSLTests: XCTestCase { }.dotMatchesNewlines(dotMatchesNewline) } } - + // `.anyGraphemeCluster` is the same as `.any` in grapheme mode. for mode in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] { try _testDSLCaptures( @@ -301,7 +302,7 @@ class RegexDSLTests: XCTestCase { One(.anyGraphemeCluster) }.matchingSemantics(mode) } - + // Like `.any` it also always matches newlines. for dotMatchesNewline in [true, false] { try _testDSLCaptures( @@ -315,7 +316,7 @@ class RegexDSLTests: XCTestCase { } } } - + func testAnyNonNewline() throws { // `.anyNonNewline` is `.` without single-line mode. for mode in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] { @@ -332,7 +333,7 @@ class RegexDSLTests: XCTestCase { OneOrMore(.anyNonNewline) }.matchingSemantics(mode).dotMatchesNewlines(dotMatchesNewline) } - + try _testDSLCaptures( ("abcdef", nil), ("abcdef\n", nil), @@ -345,7 +346,7 @@ class RegexDSLTests: XCTestCase { OneOrMore(.anyNonNewline.inverted) }.matchingSemantics(mode).dotMatchesNewlines(dotMatchesNewline) } - + try _testDSLCaptures( ("abc", "abc"), ("abcd", nil), @@ -360,7 +361,7 @@ class RegexDSLTests: XCTestCase { } } } - + try _testDSLCaptures( ("\r\n", "\r\n"), matchType: Substring.self, ==) { CharacterClass.anyNonNewline.inverted @@ -372,12 +373,12 @@ class RegexDSLTests: XCTestCase { }.matchingSemantics(.unicodeScalar) } } - + func testMatchResultDotZeroWithoutCapture() throws { let match = try XCTUnwrap("aaa".wholeMatch { OneOrMore { "a" } }) XCTAssertEqual(match.0, "aaa") } - + func testAlternation() throws { do { let regex = ChoiceOf { @@ -446,7 +447,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNil("aab".wholeMatch(of: regex)?.output) } } - + func testCombinators() throws { try _testDSLCaptures( ("aaaabccccdddkj", ("aaaabccccdddkj", "b", "cccc", "d", "k", nil, "j")), @@ -497,7 +498,7 @@ class RegexDSLTests: XCTestCase { .ignoresCase(true) .ignoresCase(false) } - + // An option on an outer component doesn't override an option set on an // inner component. try _testDSLCaptures( @@ -518,7 +519,7 @@ class RegexDSLTests: XCTestCase { } .ignoresCase(false) } - + // FIXME: Re-enable this test try _testDSLCaptures( ("can't stop won't stop", ("can't stop won't stop", "can't", "won't")), @@ -538,7 +539,7 @@ class RegexDSLTests: XCTestCase { OneOrMore(.any, .reluctant) "stop" } - + // FIXME: Re-enable this test try _testDSLCaptures( ("can't stop won't stop", ("can't stop won't stop", "can", "won")), @@ -599,7 +600,7 @@ class RegexDSLTests: XCTestCase { func testQuantificationBehavior() throws { // Must have new stdlib for character class ranges. guard ensureNewStdlib() else { return } - + // Eager by default try _testDSLCaptures( ("abc1def2", ("abc1def2", "2")), @@ -609,7 +610,7 @@ class RegexDSLTests: XCTestCase { Capture(.digit) ZeroOrMore(.any) } - + // Explicitly reluctant try _testDSLCaptures( ("abc1def2", ("abc1def2", "1")), @@ -700,7 +701,7 @@ class RegexDSLTests: XCTestCase { OneOrMore("a") }.repetitionBehavior(.possessive) } - + try _testDSLCaptures( ("abc1def2", "abc1def2"), matchType: Substring.self, ==) @@ -712,7 +713,7 @@ class RegexDSLTests: XCTestCase { CharacterClass.digit } } - + try _testDSLCaptures( ("abcdef2", ("abcdef2", "f")), ("2", ("2", nil)), @@ -726,7 +727,7 @@ class RegexDSLTests: XCTestCase { CharacterClass.digit } } - + try _testDSLCaptures( ("aaabbbcccdddeeefff", "aaabbbcccdddeeefff"), ("aaabbbcccccdddeeefff", "aaabbbcccccdddeeefff"), @@ -748,7 +749,7 @@ class RegexDSLTests: XCTestCase { Repeat(2...) { "e" } Repeat(0...) { "f" } } - + try _testDSLCaptures( ("", nil), ("a", nil), @@ -758,7 +759,7 @@ class RegexDSLTests: XCTestCase { { Repeat(2...) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", "a"), @@ -768,7 +769,7 @@ class RegexDSLTests: XCTestCase { { Repeat(...2) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", "a"), @@ -778,7 +779,7 @@ class RegexDSLTests: XCTestCase { { Repeat(..<2) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", nil), @@ -787,7 +788,7 @@ class RegexDSLTests: XCTestCase { { Repeat(...0) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", nil), @@ -796,7 +797,7 @@ class RegexDSLTests: XCTestCase { { Repeat(0 ... 0) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", nil), @@ -805,7 +806,7 @@ class RegexDSLTests: XCTestCase { { Repeat(count: 0) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", "a"), @@ -814,7 +815,7 @@ class RegexDSLTests: XCTestCase { { Repeat(0 ... 1) { "a" } } - + try _testDSLCaptures( ("", nil), ("a", "a"), @@ -824,7 +825,7 @@ class RegexDSLTests: XCTestCase { { Repeat(1 ... 2) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", nil), @@ -833,7 +834,7 @@ class RegexDSLTests: XCTestCase { { Repeat(0 ..< 1) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", "a"), @@ -842,7 +843,7 @@ class RegexDSLTests: XCTestCase { { Repeat(0 ..< 2) { "a" } } - + try _testDSLCaptures( ("", nil), ("a", "a"), @@ -852,7 +853,7 @@ class RegexDSLTests: XCTestCase { { Repeat(1 ..< 3) { "a" } } - + let octoDecimalRegex: Regex<(Substring, Int?)> = Regex { let charClass = CharacterClass(.digit, "a"..."h")//.ignoringCase() Capture { @@ -907,7 +908,7 @@ class RegexDSLTests: XCTestCase { UnicodeScalar("e") Anchor.textSegmentBoundary } - + try _testDSLCaptures( ("aaaaa1", "aaaaa1"), ("aaaaa2", nil), @@ -934,7 +935,7 @@ class RegexDSLTests: XCTestCase { Anchor.endOfSubject }.anchorsMatchLineEndings() } - + try _testDSLCaptures( ("\naaa", "\naaa"), ("aaa\n", "aaa\n"), @@ -949,7 +950,7 @@ class RegexDSLTests: XCTestCase { Optionally { "\n" } } } - + // startOfLine/endOfLine apply regardless of mode. for matchLineEndings in [true, false] { for mode in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] { @@ -958,41 +959,41 @@ class RegexDSLTests: XCTestCase { Repeat("a", count: 3) Anchor.endOfLine }.anchorsMatchLineEndings(matchLineEndings).matchingSemantics(mode) - + XCTAssertNotNil(try r.firstMatch(in: "\naaa")) XCTAssertNotNil(try r.firstMatch(in: "aaa\n")) XCTAssertNotNil(try r.firstMatch(in: "\naaa\n")) XCTAssertNotNil(try r.firstMatch(in: "\naaa\r\n")) XCTAssertNotNil(try r.firstMatch(in: "\r\naaa\n")) XCTAssertNotNil(try r.firstMatch(in: "\r\naaa\r\n")) - + XCTAssertNil(try r.firstMatch(in: "\nbaaa\n")) XCTAssertNil(try r.firstMatch(in: "\naaab\n")) } } } - + func testNestedGroups() throws { return; - + // TODO: clarify what the nesting story is - + /* - try _testDSLCaptures( - ("aaaabccccddd", ("aaaabccccddd", [("b", "cccc", ["d", "d", "d"])])), - matchType: (Substring, [(Substring, Substring, [Substring])]).self, ==) - { - "a".+ - OneOrMore { - Capture(OneOrMore("b")) - Capture(ZeroOrMore("c")) - Capture("d").* - "e".? - } - } + try _testDSLCaptures( + ("aaaabccccddd", ("aaaabccccddd", [("b", "cccc", ["d", "d", "d"])])), + matchType: (Substring, [(Substring, Substring, [Substring])]).self, ==) + { + "a".+ + OneOrMore { + Capture(OneOrMore("b")) + Capture(ZeroOrMore("c")) + Capture("d").* + "e".? + } + } */ } - + func testCaptureTransform() throws { try _testDSLCaptures( ("aaaa1", ("aaaa1", "aaa")), @@ -1015,7 +1016,7 @@ class RegexDSLTests: XCTestCase { One(.digit) } } - + func testCapturelessQuantification() throws { // This test is to make sure that a captureless quantification, when used // straight out of the quantifier (without being wrapped in a builder), is @@ -1028,7 +1029,7 @@ class RegexDSLTests: XCTestCase { let match = try XCTUnwrap(input.wholeMatch(of: regex)?.output) XCTAssertTrue(match == input) } - + func testQuantificationWithTransformedCapture() throws { // This test is to make sure transformed capture type information is // correctly propagated from the DSL into the bytecode and that the engine @@ -1037,7 +1038,7 @@ class RegexDSLTests: XCTestCase { enum Word: Int32 { case apple case orange - + init?(_ string: Substring) { switch string { case "apple": self = .apple @@ -1062,7 +1063,7 @@ class RegexDSLTests: XCTestCase { } } } - + func testNestedCaptureTypes() throws { let regex1 = Regex { OneOrMore("a") @@ -1072,8 +1073,8 @@ class RegexDSLTests: XCTestCase { } } let _: (Substring, Substring, Substring).Type - = type(of: regex1).RegexOutput.self - + = type(of: regex1).RegexOutput.self + let regex2 = Regex { OneOrMore("a") Capture { @@ -1084,8 +1085,8 @@ class RegexDSLTests: XCTestCase { } } let _: (Substring, Substring, Int?).Type - = type(of: regex2).RegexOutput.self - + = type(of: regex2).RegexOutput.self + let regex3 = Regex { OneOrMore("a") Capture { @@ -1097,8 +1098,8 @@ class RegexDSLTests: XCTestCase { } } let _: (Substring, Substring, Int, Double?).Type - = type(of: regex3).RegexOutput.self - + = type(of: regex3).RegexOutput.self + let regex4 = Regex { OneOrMore("a") Capture { @@ -1112,50 +1113,50 @@ class RegexDSLTests: XCTestCase { } let _: ( Substring, Substring, Substring, Substring, Substring?).Type - = type(of: regex4).RegexOutput.self + = type(of: regex4).RegexOutput.self } - + func testUnicodeScalarPostProcessing() throws { let spaces = Regex { ZeroOrMore { One(.whitespace) } } - + let unicodeScalar = Regex { OneOrMore { One(.hexDigit) } spaces } - + let unicodeData = Regex { unicodeScalar Optionally { ".." unicodeScalar } - + ";" spaces - + Capture { OneOrMore(.word) } - + ZeroOrMore(.any) } - + // Assert the inferred capture type. let _: (Substring, Substring).Type = type(of: unicodeData).RegexOutput.self - + let unicodeLine = - "1BCA0..1BCA3 ; Control # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP" + "1BCA0..1BCA3 ; Control # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP" let match = try XCTUnwrap(unicodeLine.wholeMatch(of: unicodeData)) XCTAssertEqual(match.0, Substring(unicodeLine)) XCTAssertEqual(match.1, "Control") } - + func testGraphemeBreakData() throws { let line = """ A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS @@ -1191,7 +1192,7 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(upper, Unicode.Scalar(0xA6F1)) XCTAssertEqual(propertyString, "Extend") } - + let regexWithTryCapture = Regex { TryCapture { OneOrMore(.hexDigit) @@ -1226,10 +1227,10 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(upper, Unicode.Scalar(0xA6F1)) XCTAssertEqual(propertyString, "Extend") } - + do { let regexLiteral = try Regex( - #"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"#, + #"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"#, as: (Substring, Substring, Substring?, Substring).self) let maybeMatchResult = line.wholeMatch(of: regexLiteral) let matchResult = try XCTUnwrap(maybeMatchResult) @@ -1240,7 +1241,7 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(propertyString, "Extend") } } - + func testBackreference() throws { try _testDSLCaptures( ("abc#41#42abcabcabc", ("abc#41#42abcabcabc", "abc", 42, "abc", nil)), @@ -1266,7 +1267,7 @@ class RegexDSLTests: XCTestCase { Capture(a) } } - + // Match result referencing a `Reference`. do { let a = Reference(Substring.self) @@ -1294,7 +1295,7 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(result[a], "abc") XCTAssertEqual(result[b], 42) } - + do { let key = Reference(Substring.self) let value = Reference(Int.self) @@ -1312,15 +1313,15 @@ class RegexDSLTests: XCTestCase { } transform: { Int($0)! } } } - + let result1 = try XCTUnwrap("age:123".wholeMatch(of: regex)) XCTAssertEqual(result1[key], "age") XCTAssertEqual(result1[value], 123) - + let result2 = try XCTUnwrap(":567".wholeMatch(of: regex)) XCTAssertEqual(result2[key], "") XCTAssertEqual(result2[value], 567) - + let result3 = try XCTUnwrap("status:".wholeMatch(of: regex)) XCTAssertEqual(result3[key], "status") // Traps: @@ -1351,7 +1352,7 @@ class RegexDSLTests: XCTestCase { } } } - + // Post-hoc captured reference w/ attempted match before capture // #"(?:\w\1|(\w):)+"# // @@ -1400,7 +1401,7 @@ class RegexDSLTests: XCTestCase { } } } - + func testScalarMatching() throws { // RegexBuilder provides a RegexComponent conformance for UnicodeScalar. In // grapheme cluster mode, it should only match entire graphemes. It may @@ -1409,7 +1410,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNil("a\u{301}".firstMatch(of: "a" as UnicodeScalar)) XCTAssertNotNil("a\u{301}".firstMatch( of: ("a" as UnicodeScalar).regex.matchingSemantics(.unicodeScalar))) - + let r1 = Regex { "a" as UnicodeScalar } @@ -1417,7 +1418,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNotNil( try r1.matchingSemantics(.unicodeScalar).firstMatch(in: "a\u{301}") ) - + let r2 = Regex { CharacterClass.anyOf(["a" as UnicodeScalar, "๐Ÿ‘"]) } @@ -1425,7 +1426,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNotNil( try r2.matchingSemantics(.unicodeScalar).firstMatch(in: "a\u{301}") ) - + let r3 = Regex { "๐Ÿ‘จ" as UnicodeScalar "\u{200D}" as UnicodeScalar @@ -1439,7 +1440,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNotNil(try r3.wholeMatch(in: "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ")) XCTAssertNotNil(try r3.matchingSemantics(.unicodeScalar).firstMatch(in: "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ")) XCTAssertNotNil(try r3.matchingSemantics(.unicodeScalar).wholeMatch(in: "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ")) - + let r4 = Regex { "รฉ" as UnicodeScalar } XCTAssertNotNil( try r4.firstMatch(in: "e\u{301}") @@ -1447,28 +1448,28 @@ class RegexDSLTests: XCTestCase { XCTAssertNotNil( try r4.firstMatch(in: "รฉ") ) - + let r5 = Regex { "e" "\u{301}" as UnicodeScalar } XCTAssertNotNil(try r5.firstMatch(in: "e\u{301}")) XCTAssertNotNil(try r5.firstMatch(in: "รฉ")) - + let r6 = Regex { "abcde" "\u{301}" } XCTAssertNotNil(try r6.firstMatch(in: "abcde\u{301}")) XCTAssertNotNil(try r6.firstMatch(in: "abcdรฉ")) - + let r7 = Regex { "e" as Character "\u{301}" as Character } XCTAssertNotNil(try r7.firstMatch(in: "e\u{301}")) XCTAssertNotNil(try r7.firstMatch(in: "รฉ")) - + // You can't match a partial grapheme in grapheme semantic mode. let r8 = Regex { "๐Ÿ‘จ" as UnicodeScalar @@ -1481,7 +1482,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNil(try r8.wholeMatch(in: "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ")) XCTAssertNotNil(try r8.matchingSemantics(.unicodeScalar).firstMatch(in: "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ")) XCTAssertNil(try r8.matchingSemantics(.unicodeScalar).wholeMatch(in: "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ")) - + // Scalar coalescing occurs across nested concatenations and literals. let r9 = Regex { Regex { @@ -1503,7 +1504,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNotNil(try r9.wholeMatch(in: "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ")) XCTAssertNotNil(try r9.matchingSemantics(.unicodeScalar).firstMatch(in: "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ")) XCTAssertNotNil(try r9.matchingSemantics(.unicodeScalar).wholeMatch(in: "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ")) - + let r10 = Regex { "๐Ÿ‘จ" as UnicodeScalar try! Regex(#"\u{200D 1F468 200D 1F467}"#) @@ -1515,7 +1516,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNotNil(try r10.matchingSemantics(.unicodeScalar).firstMatch(in: "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ")) XCTAssertNotNil(try r10.matchingSemantics(.unicodeScalar).wholeMatch(in: "๐Ÿ‘จโ€๐Ÿ‘จโ€๐Ÿ‘งโ€๐Ÿ‘ฆ")) } - + struct SemanticVersion: Equatable { var major: Int var minor: Int @@ -1542,11 +1543,11 @@ class RegexDSLTests: XCTestCase { Capture(OneOrMore(.word)) } } - + guard let match = input[index..) throws -> (upperBound: String.Index, output: Void)? { print("Matching '\(label)'", to: &Self.traceOutput) print(input, to: &Self.traceOutput) @@ -1611,7 +1612,7 @@ class RegexDSLTests: XCTestCase { """) } - + func testRegexComponentBuilderResultType() { // Test that the user can declare a closure or computed property marked with // `@RegexComponentBuilder` with `Regex` as the result type. @@ -1654,7 +1655,7 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(try replace("{bar}"), "foo") } - + func testOptionalNesting() throws { try _testDSLCaptures( ("a", ("a", nil)), @@ -1665,7 +1666,7 @@ class RegexDSLTests: XCTestCase { { try! Regex("(?:a|(b)*)?", as: (Substring, Substring?).self) } - + try _testDSLCaptures( ("a", ("a", nil)), ("", ("", nil)), @@ -1677,7 +1678,7 @@ class RegexDSLTests: XCTestCase { try! Regex("a|(b)*", as: (Substring, Substring?).self) } } - + try _testDSLCaptures( ("a", ("a", nil)), ("", ("", nil)), @@ -1692,7 +1693,7 @@ class RegexDSLTests: XCTestCase { } } } - + try _testDSLCaptures( ("a", ("a", nil)), ("", ("", nil)), @@ -1705,7 +1706,7 @@ class RegexDSLTests: XCTestCase { try! Regex("(b)*", as: (Substring, Substring?).self) } } - + try _testDSLCaptures( ("a", ("a", nil)), ("", ("", nil)), @@ -1720,7 +1721,7 @@ class RegexDSLTests: XCTestCase { } } } - + try _testDSLCaptures( ("a", ("a", nil)), ("", ("", nil)), @@ -1737,7 +1738,7 @@ class RegexDSLTests: XCTestCase { } } } - + let r = Regex { Optionally { Optionally { @@ -1757,6 +1758,139 @@ class RegexDSLTests: XCTestCase { } } +fileprivate let oneNumericField = "abc:123:def" +fileprivate let twoNumericFields = "abc:123:def:456:ghi" + +@available(SwiftStdlib 5.7, *) +fileprivate let regexWithCapture = #/:(\d+):/# +@available(SwiftStdlib 5.7, *) +fileprivate let regexWithLabeledCapture = #/:(?\d+):/# +@available(SwiftStdlib 5.7, *) +fileprivate let regexWithNonCapture = #/:(?:\d+):/# + +@available(SwiftStdlib 5.7, *) +extension RegexDSLTests { + func testLabeledCaptures_regularCapture() throws { + // The output type of a regex with unlabeled captures is concatenated. + let dslWithCapture = Regex { + OneOrMore(.word) + regexWithCapture + OneOrMore(.word) + } + XCTAssert(type(of: dslWithCapture).self == Regex<(Substring, Substring)>.self) + + let output = try XCTUnwrap(oneNumericField.wholeMatch(of: dslWithCapture)?.output) + XCTAssertEqual(output.0, oneNumericField[...]) + XCTAssertEqual(output.1, "123") + } + + func testLabeledCaptures_labeledCapture() throws { + guard #available(macOS 13, *) else { + XCTSkip("Fix only exists on macOS 13") + return + } + // The output type of a regex with a labeled capture is dropped. + let dslWithLabeledCapture = Regex { + OneOrMore(.word) + regexWithLabeledCapture + OneOrMore(.word) + } + XCTAssert(type(of: dslWithLabeledCapture).self == Regex.self) + + let match = try XCTUnwrap(oneNumericField.wholeMatch(of: dslWithLabeledCapture)) + XCTAssertEqual(match.output, oneNumericField[...]) + + // We can recover the ignored captures by converting to `AnyRegexOutput`. + let anyOutput = AnyRegexOutput(match) + XCTAssertEqual(anyOutput.count, 2) + XCTAssertEqual(anyOutput[0].substring, oneNumericField[...]) + XCTAssertEqual(anyOutput[1].substring, "123") + XCTAssertEqual(anyOutput["number"]?.substring, "123") + } + + func testLabeledCaptures_coalescingWithCapture() throws { + let coalescingWithCapture = Regex { + "e" as Character + #/\u{301}(\d*)/# + } + XCTAssertNotNil(try coalescingWithCapture.firstMatch(in: "e\u{301}")) + XCTAssertNotNil(try coalescingWithCapture.firstMatch(in: "รฉ")) + + let coalescingWithLabeledCapture = Regex { + "e" as Character + #/\u{301}(?\d*)/# + } + XCTAssertNotNil(try coalescingWithLabeledCapture.firstMatch(in: "e\u{301}")) + XCTAssertNotNil(try coalescingWithLabeledCapture.firstMatch(in: "รฉ")) + } + + func testLabeledCaptures_bothCapture() throws { + guard #available(macOS 13, *) else { + XCTSkip("Fix only exists on macOS 13") + return + } + // Only the output type of a regex with a labeled capture is dropped, + // outputs of other regexes in the same DSL are concatenated. + let dslWithBothCaptures = Regex { + OneOrMore(.word) + regexWithCapture + OneOrMore(.word) + regexWithLabeledCapture + OneOrMore(.word) + } + XCTAssert(type(of: dslWithBothCaptures).self == Regex<(Substring, Substring)>.self) + + let match = try XCTUnwrap(twoNumericFields.wholeMatch(of: dslWithBothCaptures)) + XCTAssertEqual(match.output.0, twoNumericFields[...]) + XCTAssertEqual(match.output.1, "123") + + let anyOutput = AnyRegexOutput(match) + XCTAssertEqual(anyOutput.count, 3) + XCTAssertEqual(anyOutput[0].substring, twoNumericFields[...]) + XCTAssertEqual(anyOutput[1].substring, "123") + XCTAssertEqual(anyOutput[2].substring, "456") + } + + func testLabeledCaptures_tooManyCapture() throws { + guard #available(macOS 13, *) else { + XCTSkip("Fix only exists on macOS 13") + return + } + // The output type of a regex with too many captures is dropped. + // "Too many" means the left and right output types would add up to >= 10. + let alpha = "AAA:abcdefghijklm:123:456:" + let regexWithTooManyCaptures = #/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)/# + let dslWithTooManyCaptures = Regex { + Capture(OneOrMore(.word)) + ":" + regexWithTooManyCaptures + ":" + TryCapture(OneOrMore(.word)) { Int($0) } + #/:(\d+):/# + } + XCTAssert(type(of: dslWithTooManyCaptures).self + == Regex<(Substring, Substring, Int, Substring)>.self) + + let match = try XCTUnwrap(alpha.wholeMatch(of: dslWithTooManyCaptures)) + XCTAssertEqual(match.output.0, alpha[...]) + XCTAssertEqual(match.output.1, "AAA") + XCTAssertEqual(match.output.2, 123) + XCTAssertEqual(match.output.3, "456") + + // All captures groups are available through `AnyRegexOutput`. + let anyOutput = AnyRegexOutput(match) + XCTAssertEqual(anyOutput.count, 17) + XCTAssertEqual(anyOutput[0].substring, alpha[...]) + XCTAssertEqual(anyOutput[1].substring, "AAA") + for (offset, letter) in "abcdefghijklm".enumerated() { + XCTAssertEqual(anyOutput[offset + 2].substring, String(letter)[...]) + } + XCTAssertEqual(anyOutput[15].substring, "123") + XCTAssertEqual(anyOutput[15].value as? Int, 123) + XCTAssertEqual(anyOutput[16].substring, "456") + } +} + extension Unicode.Scalar { // Convert a hexadecimal string to a scalar init?(hex: S) { diff --git a/Tests/RegexTests/CaptureTests.swift b/Tests/RegexTests/CaptureTests.swift index 26093bc64..85aecd210 100644 --- a/Tests/RegexTests/CaptureTests.swift +++ b/Tests/RegexTests/CaptureTests.swift @@ -16,15 +16,15 @@ import XCTest extension CaptureList.Capture { static var cap: Self { - return Self(optionalDepth: 0, .fake) + return Self(optionalDepth: 0, visibleInTypedOutput: true, .fake) } static var opt: Self { - return Self(optionalDepth: 1, .fake) + return Self(optionalDepth: 1, visibleInTypedOutput: true, .fake) } static func named(_ name: String, opt: Int = 0) -> Self { - return Self(name: name, optionalDepth: opt, .fake) + return Self(name: name, optionalDepth: opt, visibleInTypedOutput: true, .fake) } } extension CaptureList {