From ec261f106bdf1e4692e23d9bd7a879648ec62e12 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Mon, 30 Jan 2023 13:35:38 -0600 Subject: [PATCH 1/9] Fix output type mismatch with RegexBuilder Some regex literals (and presumably other `Regex` instances) lose their output type information when used in a RegexBuilder closure due to the way the concatenating builder calls are overloaded. In particular, any output type with labeled tuples or where the sum of tuple components in the accumulated and new output types is greater than 10 will be ignored. Regex internals don't make this distinction, however, so there ends up being a mismatch between what a `Regex.Match` instance tries to produce and the output type of the outermost regex. For example, this code results in a crash, because `regex` is a `Regex` but the match tries to produce a `(Substring, number: Substring)`: let regex = Regex { ZeroOrMore(.whitespace) /:(?\d+):/ ZeroOrMore(.whitespace) } let match = try regex.wholeMatch(in: " :21: ") print(match!.output) To fix this, we add a new `ignoreCapturesInTypedOutput` DSLTree node to mark situations where the output type is discarded. This status is propagated through the capture list into the match's storage, which lets us produce the correct output type. Note that we can't just drop the capture groups when building the compiled program because (1) different parts of the regex might reference the capture group and (2) all capture groups are available if a developer converts the output to `AnyRegexOutput`. let anyOutput = AnyRegexOutput(match) // anyOutput[1] == "21" // anyOutput["number"] == Optional("21") Fixes #625. rdar://104823356 --- Package.swift | 4 + Sources/RegexBuilder/Variadics.swift | 68 +++++++++-- .../VariadicsGenerator.swift | 8 +- .../Regex/Parse/CaptureList.swift | 33 ++--- Sources/_StringProcessing/ByteCodeGen.swift | 5 +- Sources/_StringProcessing/Capture.swift | 2 +- .../_StringProcessing/ConsumerInterface.swift | 2 +- .../Engine/Structuralize.swift | 3 +- .../_StringProcessing/PrintAsPattern.swift | 3 + .../Regex/AnyRegexOutput.swift | 4 + Sources/_StringProcessing/Regex/DSLTree.swift | 61 +++++++--- .../Utility/RegexFactory.swift | 10 ++ .../RegexBuilderTests.swift | 113 ++++++++++++++++++ Tests/RegexTests/CaptureTests.swift | 6 +- 14 files changed, 272 insertions(+), 50 deletions(-) diff --git a/Package.swift b/Package.swift index b30c402c4..ba3c867de 100644 --- a/Package.swift +++ b/Package.swift @@ -8,6 +8,10 @@ let availabilityDefinition = PackageDescription.SwiftSetting.unsafeFlags([ "-define-availability", "-Xfrontend", "SwiftStdlib 5.7:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999", + "-Xfrontend", + "-define-availability", + "-Xfrontend", + "SwiftStdlib 5.8:macOS 9999, iOS 9999, watchOS 9999, tvOS 9999", ]) /// Swift settings for building a private stdlib-like module that is to be used diff --git a/Sources/RegexBuilder/Variadics.swift b/Sources/RegexBuilder/Variadics.swift index 0f19cd6b0..cbd11e192 100644 --- a/Sources/RegexBuilder/Variadics.swift +++ b/Sources/RegexBuilder/Variadics.swift @@ -2,7 +2,7 @@ // // This source file is part of the Swift.org open source project // -// Copyright (c) 2021-2022 Apple Inc. and the Swift project authors +// Copyright (c) 2021-2023 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information @@ -571,7 +571,11 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex where R0.RegexOutput == W0 { let factory = makeFactory() - return factory.accumulate(accumulated, next) + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) + } else { + return factory.accumulate(accumulated, next) + } } } @available(SwiftStdlib 5.7, *) @@ -582,7 +586,11 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C0)> where R0.RegexOutput == (W0, C0) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) + } else { + return factory.accumulate(accumulated, next) + } } } @available(SwiftStdlib 5.7, *) @@ -593,7 +601,11 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1)> where R0.RegexOutput == (W0, C0, C1) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) + } else { + return factory.accumulate(accumulated, next) + } } } @available(SwiftStdlib 5.7, *) @@ -604,7 +616,11 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2)> where R0.RegexOutput == (W0, C0, C1, C2) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) + } else { + return factory.accumulate(accumulated, next) + } } } @available(SwiftStdlib 5.7, *) @@ -615,7 +631,11 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3)> where R0.RegexOutput == (W0, C0, C1, C2, C3) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) + } else { + return factory.accumulate(accumulated, next) + } } } @available(SwiftStdlib 5.7, *) @@ -626,7 +646,11 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) + } else { + return factory.accumulate(accumulated, next) + } } } @available(SwiftStdlib 5.7, *) @@ -637,7 +661,11 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) + } else { + return factory.accumulate(accumulated, next) + } } } @available(SwiftStdlib 5.7, *) @@ -648,7 +676,11 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) + } else { + return factory.accumulate(accumulated, next) + } } } @available(SwiftStdlib 5.7, *) @@ -659,7 +691,11 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6, C7)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) + } else { + return factory.accumulate(accumulated, next) + } } } @available(SwiftStdlib 5.7, *) @@ -670,7 +706,11 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6, C7, C8)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) + } else { + return factory.accumulate(accumulated, next) + } } } @available(SwiftStdlib 5.7, *) @@ -681,7 +721,11 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6, C7, C8, C9)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) + } else { + return factory.accumulate(accumulated, next) + } } } diff --git a/Sources/VariadicsGenerator/VariadicsGenerator.swift b/Sources/VariadicsGenerator/VariadicsGenerator.swift index a971dafd7..092b98da7 100644 --- a/Sources/VariadicsGenerator/VariadicsGenerator.swift +++ b/Sources/VariadicsGenerator/VariadicsGenerator.swift @@ -132,7 +132,7 @@ struct VariadicsGenerator: ParsableCommand { // // This source file is part of the Swift.org open source project // - // Copyright (c) 2021-2022 Apple Inc. and the Swift project authors + // Copyright (c) 2021-2023 Apple Inc. and the Swift project authors // Licensed under Apache License v2.0 with Runtime Library Exception // // See https://swift.org/LICENSE.txt for license information @@ -308,7 +308,11 @@ struct VariadicsGenerator: ParsableCommand { output(""" { let factory = makeFactory() - return factory.accumulate(accumulated, next) + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) + } else { + return factory.accumulate(accumulated, next) + } } } diff --git a/Sources/_RegexParser/Regex/Parse/CaptureList.swift b/Sources/_RegexParser/Regex/Parse/CaptureList.swift index 5c448dc5a..8ea1ab682 100644 --- a/Sources/_RegexParser/Regex/Parse/CaptureList.swift +++ b/Sources/_RegexParser/Regex/Parse/CaptureList.swift @@ -27,16 +27,19 @@ extension CaptureList { public var type: Any.Type public var optionalDepth: Int public var location: SourceLocation + public var visibleInTypedOutput: Bool public init( name: String? = nil, type: Any.Type = Substring.self, optionalDepth: Int, + visibleInTypedOutput: Bool, _ location: SourceLocation ) { self.name = name self.type = type self.optionalDepth = optionalDepth + self.visibleInTypedOutput = visibleInTypedOutput self.location = location } } @@ -104,58 +107,60 @@ extension CaptureList { extension CaptureList.Builder { public mutating func addCaptures( - of node: AST.Node, optionalNesting nesting: OptionalNesting + of node: AST.Node, + optionalNesting nesting: OptionalNesting, + visibleInTypedOutput: Bool ) { switch node { case let .alternation(a): for child in a.children { - addCaptures(of: child, optionalNesting: nesting.addingOptional) + addCaptures(of: child, optionalNesting: nesting.addingOptional, visibleInTypedOutput: visibleInTypedOutput) } case let .concatenation(c): for child in c.children { - addCaptures(of: child, optionalNesting: nesting) + addCaptures(of: child, optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput) } case let .group(g): switch g.kind.value { case .capture: - captures.append(.init(optionalDepth: nesting.depth, g.location)) + captures.append(.init(optionalDepth: nesting.depth, visibleInTypedOutput: visibleInTypedOutput, g.location)) case .namedCapture(let name): captures.append(.init( - name: name.value, optionalDepth: nesting.depth, g.location)) + name: name.value, optionalDepth: nesting.depth, visibleInTypedOutput: visibleInTypedOutput, g.location)) case .balancedCapture(let b): captures.append(.init( - name: b.name?.value, optionalDepth: nesting.depth, g.location)) + name: b.name?.value, optionalDepth: nesting.depth, visibleInTypedOutput: visibleInTypedOutput, g.location)) default: break } - addCaptures(of: g.child, optionalNesting: nesting) + addCaptures(of: g.child, optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput) case .conditional(let c): switch c.condition.kind { case .group(let g): - addCaptures(of: .group(g), optionalNesting: nesting) + addCaptures(of: .group(g), optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput) default: break } - addCaptures(of: c.trueBranch, optionalNesting: nesting.addingOptional) - addCaptures(of: c.falseBranch, optionalNesting: nesting.addingOptional) + addCaptures(of: c.trueBranch, optionalNesting: nesting.addingOptional, visibleInTypedOutput: visibleInTypedOutput) + addCaptures(of: c.falseBranch, optionalNesting: nesting.addingOptional, visibleInTypedOutput: visibleInTypedOutput) case .quantification(let q): var optNesting = nesting if q.amount.value.bounds.atLeast == 0 { optNesting = optNesting.addingOptional } - addCaptures(of: q.child, optionalNesting: optNesting) + addCaptures(of: q.child, optionalNesting: optNesting, visibleInTypedOutput: visibleInTypedOutput) case .absentFunction(let abs): switch abs.kind { case .expression(_, _, let child): - addCaptures(of: child, optionalNesting: nesting) + addCaptures(of: child, optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput) case .clearer, .repeater, .stopper: break } @@ -166,8 +171,8 @@ extension CaptureList.Builder { } public static func build(_ ast: AST) -> CaptureList { var builder = Self() - builder.captures.append(.init(optionalDepth: 0, .fake)) - builder.addCaptures(of: ast.root, optionalNesting: .init(canNest: false)) + builder.captures.append(.init(optionalDepth: 0, visibleInTypedOutput: true, .fake)) + builder.addCaptures(of: ast.root, optionalNesting: .init(canNest: false), visibleInTypedOutput: true) return builder.captures } } diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift index e0a6c7465..15e052901 100644 --- a/Sources/_StringProcessing/ByteCodeGen.swift +++ b/Sources/_StringProcessing/ByteCodeGen.swift @@ -874,7 +874,7 @@ fileprivate extension Compiler.ByteCodeGen { switch node { case .concatenation(let ch): return ch.flatMap(flatten) - case .convertedRegexLiteral(let n, _): + case .convertedRegexLiteral(let n, _), .ignoreCapturesInTypedOutput(let n): return flatten(n) default: return [node] @@ -951,6 +951,9 @@ fileprivate extension Compiler.ByteCodeGen { case let .nonCapturingGroup(kind, child): try emitNoncapturingGroup(kind.ast, child) + case let .ignoreCapturesInTypedOutput(child): + try emitNode(child) + case .conditional: throw Unsupported("Conditionals") diff --git a/Sources/_StringProcessing/Capture.swift b/Sources/_StringProcessing/Capture.swift index b75d01392..696a85361 100644 --- a/Sources/_StringProcessing/Capture.swift +++ b/Sources/_StringProcessing/Capture.swift @@ -61,7 +61,7 @@ extension Sequence where Element == AnyRegexOutput.Element { // and traffic through existentials @available(SwiftStdlib 5.7, *) func existentialOutput(from input: String) -> Any { - let elements = map { + let elements = filter(\.representation.visibleInTypedOutput).map { $0.existentialOutputComponent(from: input) } return elements.count == 1 diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift index 3a2731b0a..705b354fb 100644 --- a/Sources/_StringProcessing/ConsumerInterface.swift +++ b/Sources/_StringProcessing/ConsumerInterface.swift @@ -42,7 +42,7 @@ extension DSLTree.Node { case .orderedChoice, .conditional, .concatenation, .capture, .nonCapturingGroup, .quantification, .trivia, .empty, - .absentFunction: return nil + .ignoreCapturesInTypedOutput, .absentFunction: return nil case .consumer: fatalError("FIXME: Is this where we handle them?") diff --git a/Sources/_StringProcessing/Engine/Structuralize.swift b/Sources/_StringProcessing/Engine/Structuralize.swift index bc3adf701..32d7a6204 100644 --- a/Sources/_StringProcessing/Engine/Structuralize.swift +++ b/Sources/_StringProcessing/Engine/Structuralize.swift @@ -14,7 +14,8 @@ extension CaptureList { optionalDepth: cap.optionalDepth, content: meStored.deconstructed, name: cap.name, - referenceID: list.referencedCaptureOffsets.first { $1 == i }?.key + referenceID: list.referencedCaptureOffsets.first { $1 == i }?.key, + visibleInTypedOutput: cap.visibleInTypedOutput ) result.append(element) diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift index 953df6882..8b456f37d 100644 --- a/Sources/_StringProcessing/PrintAsPattern.swift +++ b/Sources/_StringProcessing/PrintAsPattern.swift @@ -131,6 +131,9 @@ extension PrettyPrinter { printer.printAsPattern(convertedFromAST: child) } + case let .ignoreCapturesInTypedOutput(child): + printAsPattern(convertedFromAST: child, isTopLevel: isTopLevel) + case .conditional: print("/* TODO: conditional */") diff --git a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift index fd292ed1b..243c1ba01 100644 --- a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift +++ b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift @@ -359,6 +359,10 @@ extension AnyRegexOutput { /// The capture reference this element refers to. var referenceID: ReferenceID? = nil + + /// A Boolean value indicating whether this capture should be included in + /// the typed output. + var visibleInTypedOutput: Bool } internal init(input: String, elements: [ElementRepresentation]) { diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 0a0831706..93e86c607 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -42,6 +42,9 @@ extension DSLTree { /// Matches a noncapturing subpattern. case nonCapturingGroup(_AST.GroupKind, Node) + /// Marks all captures in a subpattern as ignored in strongly-typed output. + case ignoreCapturesInTypedOutput(Node) + // TODO: Consider splitting off grouped conditions, or have // our own kind @@ -340,6 +343,27 @@ typealias _CharacterPredicateInterface = ( */ extension DSLTree.Node { + /// Indicates whether this node has at least one child node (among other + /// associated values). + var hasChildNodes: Bool { + switch self { + case .trivia, .empty, .quotedLiteral, + .consumer, .matcher, .characterPredicate, + .customCharacterClass, .atom: + return false + + case .orderedChoice(let c), .concatenation(let c): + return !c.isEmpty + + case .convertedRegexLiteral, .capture, .nonCapturingGroup, + .quantification, .ignoreCapturesInTypedOutput, .conditional: + return true + + case .absentFunction(let abs): + return !abs.ast.children.isEmpty + } + } + @_spi(RegexBuilder) public var children: [DSLTree.Node] { switch self { @@ -354,6 +378,7 @@ extension DSLTree.Node { case let .capture(_, _, n, _): return [n] case let .nonCapturingGroup(_, n): return [n] case let .quantification(_, _, n): return [n] + case let .ignoreCapturesInTypedOutput(n): return [n] case let .conditional(_, t, f): return [t,f] @@ -403,11 +428,13 @@ extension DSLTree { } extension DSLTree { + /// Indicates whether this DSLTree contains any capture groups. var hasCapture: Bool { root.hasCapture } } extension DSLTree.Node { + /// Indicates whether this DSLTree node contains any capture groups. var hasCapture: Bool { switch self { case .capture: @@ -572,52 +599,55 @@ struct CaptureTransform: Hashable, CustomStringConvertible { extension CaptureList.Builder { mutating func addCaptures( - of node: DSLTree.Node, optionalNesting nesting: OptionalNesting + of node: DSLTree.Node, optionalNesting nesting: OptionalNesting, visibleInTypedOutput: Bool ) { switch node { case let .orderedChoice(children): for child in children { - addCaptures(of: child, optionalNesting: nesting.addingOptional) + addCaptures(of: child, optionalNesting: nesting.addingOptional, visibleInTypedOutput: visibleInTypedOutput) } case let .concatenation(children): for child in children { - addCaptures(of: child, optionalNesting: nesting) + addCaptures(of: child, optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput) } case let .capture(name, _, child, transform): captures.append(.init( name: name, type: transform?.resultType ?? child.wholeMatchType, - optionalDepth: nesting.depth, .fake)) - addCaptures(of: child, optionalNesting: nesting) + optionalDepth: nesting.depth, visibleInTypedOutput: visibleInTypedOutput, .fake)) + addCaptures(of: child, optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput) case let .nonCapturingGroup(kind, child): assert(!kind.ast.isCapturing) - addCaptures(of: child, optionalNesting: nesting) + addCaptures(of: child, optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput) + + case let .ignoreCapturesInTypedOutput(child): + addCaptures(of: child, optionalNesting: nesting, visibleInTypedOutput: false) case let .conditional(cond, trueBranch, falseBranch): switch cond.ast { case .group(let g): - addCaptures(of: .group(g), optionalNesting: nesting) + addCaptures(of: .group(g), optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput) default: break } - addCaptures(of: trueBranch, optionalNesting: nesting.addingOptional) - addCaptures(of: falseBranch, optionalNesting: nesting.addingOptional) + addCaptures(of: trueBranch, optionalNesting: nesting.addingOptional, visibleInTypedOutput: visibleInTypedOutput) + addCaptures(of: falseBranch, optionalNesting: nesting.addingOptional, visibleInTypedOutput: visibleInTypedOutput) case let .quantification(amount, _, child): var optNesting = nesting if amount.ast.bounds.atLeast == 0 { optNesting = optNesting.addingOptional } - addCaptures(of: child, optionalNesting: optNesting) + addCaptures(of: child, optionalNesting: optNesting, visibleInTypedOutput: visibleInTypedOutput) case let .absentFunction(abs): switch abs.ast.kind { case .expression(_, _, let child): - addCaptures(of: child, optionalNesting: nesting) + addCaptures(of: child, optionalNesting: nesting, visibleInTypedOutput: visibleInTypedOutput) case .clearer, .repeater, .stopper: break } @@ -625,7 +655,7 @@ extension CaptureList.Builder { case let .convertedRegexLiteral(n, _): // We disable nesting for converted AST trees, as literals do not nest // captures. This includes literals nested in a DSL. - return addCaptures(of: n, optionalNesting: nesting.disablingNesting) + return addCaptures(of: n, optionalNesting: nesting.disablingNesting, visibleInTypedOutput: visibleInTypedOutput) case .matcher: break @@ -639,8 +669,8 @@ extension CaptureList.Builder { static func build(_ dsl: DSLTree) -> CaptureList { var builder = Self() builder.captures.append( - .init(type: dsl.root.wholeMatchType, optionalDepth: 0, .fake)) - builder.addCaptures(of: dsl.root, optionalNesting: .init(canNest: true)) + .init(type: dsl.root.wholeMatchType, optionalDepth: 0, visibleInTypedOutput: true, .fake)) + builder.addCaptures(of: dsl.root, optionalNesting: .init(canNest: true), visibleInTypedOutput: true) return builder.captures } } @@ -650,7 +680,7 @@ extension DSLTree.Node { /// output but forwarding its only child's output. var isOutputForwarding: Bool { switch self { - case .nonCapturingGroup: + case .nonCapturingGroup, .ignoreCapturesInTypedOutput: return true case .orderedChoice, .concatenation, .capture, .conditional, .quantification, .customCharacterClass, .atom, @@ -710,6 +740,7 @@ extension DSLTree { case let .capture(_, _, n, _): return [_Tree(n)] case let .nonCapturingGroup(_, n): return [_Tree(n)] case let .quantification(_, _, n): return [_Tree(n)] + case let .ignoreCapturesInTypedOutput(n): return [_Tree(n)] case let .conditional(_, t, f): return [_Tree(t), _Tree(f)] diff --git a/Sources/_StringProcessing/Utility/RegexFactory.swift b/Sources/_StringProcessing/Utility/RegexFactory.swift index e0df906fa..584772921 100644 --- a/Sources/_StringProcessing/Utility/RegexFactory.swift +++ b/Sources/_StringProcessing/Utility/RegexFactory.swift @@ -20,6 +20,16 @@ public struct _RegexFactory { // Hide is behind an SPI that only RegexBuilder can use. @_spi(RegexBuilder) public init() {} + + @available(SwiftStdlib 5.8, *) + public func ignoreCapturesInTypedOutput( + _ child: some RegexComponent + ) -> Regex { + // Don't wrap `child` again if it's a leaf node. + child.regex.root.hasChildNodes + ? .init(node: .ignoreCapturesInTypedOutput(child.regex.root)) + : .init(node: child.regex.root) + } @available(SwiftStdlib 5.7, *) public func accumulate( diff --git a/Tests/DocumentationTests/RegexBuilderTests.swift b/Tests/DocumentationTests/RegexBuilderTests.swift index d0ae36e01..0f1813e40 100644 --- a/Tests/DocumentationTests/RegexBuilderTests.swift +++ b/Tests/DocumentationTests/RegexBuilderTests.swift @@ -204,4 +204,117 @@ extension RegexBuilderTests { XCTAssertEqual(matches.count, 1) XCTAssertEqual(matches[0].1, 121.54) } + + func testLabeledCapturesInDSL() throws { + let oneNumericField = "abc:123:def" + let twoNumericFields = "abc:123:def:456:ghi" + + let regexWithCapture = #/:(\d+):/# + let regexWithLabeledCapture = #/:(?\d+):/# + let regexWithNonCapture = #/:(?:\d+):/# + + do { + // The output type of a regex with unlabeled captures is concatenated. + let dslWithCapture = Regex { + OneOrMore(.word) + regexWithCapture + OneOrMore(.word) + } + XCTAssert(type(of: dslWithCapture).self == Regex<(Substring, Substring)>.self) + + let output = try XCTUnwrap(oneNumericField.wholeMatch(of: dslWithCapture)?.output) + XCTAssertEqual(output.0, oneNumericField[...]) + XCTAssertEqual(output.1, "123") + } + do { + // The output type of a regex with a labeled capture is dropped. + let dslWithLabeledCapture = Regex { + OneOrMore(.word) + regexWithLabeledCapture + OneOrMore(.word) + } + XCTAssert(type(of: dslWithLabeledCapture).self == Regex.self) + + let match = try XCTUnwrap(oneNumericField.wholeMatch(of: dslWithLabeledCapture)) + XCTAssertEqual(match.output, oneNumericField[...]) + + // We can recover the ignored captures by converting to `AnyRegexOutput`. + let anyOutput = AnyRegexOutput(match) + XCTAssertEqual(anyOutput.count, 2) + XCTAssertEqual(anyOutput[0].substring, oneNumericField[...]) + XCTAssertEqual(anyOutput[1].substring, "123") + XCTAssertEqual(anyOutput["number"]?.substring, "123") + } + do { + let coalescingWithCapture = Regex { + "e" as Character + #/\u{301}(\d*)/# + } + XCTAssertNotNil(try coalescingWithCapture.firstMatch(in: "e\u{301}")) + XCTAssertNotNil(try coalescingWithCapture.firstMatch(in: "é")) + + let coalescingWithLabeledCapture = Regex { + "e" as Character + #/\u{301}(?\d*)/# + } + XCTAssertNotNil(try coalescingWithLabeledCapture.firstMatch(in: "e\u{301}")) + XCTAssertNotNil(try coalescingWithLabeledCapture.firstMatch(in: "é")) + } + do { + // Only the output type of a regex with a labeled capture is dropped, + // outputs of other regexes in the same DSL are concatenated. + let dslWithBothCaptures = Regex { + OneOrMore(.word) + regexWithCapture + OneOrMore(.word) + regexWithLabeledCapture + OneOrMore(.word) + } + XCTAssert(type(of: dslWithBothCaptures).self == Regex<(Substring, Substring)>.self) + + let match = try XCTUnwrap(twoNumericFields.wholeMatch(of: dslWithBothCaptures)) + XCTAssertEqual(match.output.0, twoNumericFields[...]) + XCTAssertEqual(match.output.1, "123") + + let anyOutput = AnyRegexOutput(match) + XCTAssertEqual(anyOutput.count, 3) + XCTAssertEqual(anyOutput[0].substring, twoNumericFields[...]) + XCTAssertEqual(anyOutput[1].substring, "123") + XCTAssertEqual(anyOutput[2].substring, "456") + } + do { + // The output type of a regex with too many captures is dropped. + // "Too many" means the left and right output types would add up to >= 10. + let alpha = "AAA:abcdefghijklm:123:456:" + let regexWithTooManyCaptures = #/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)/# + let dslWithTooManyCaptures = Regex { + Capture(OneOrMore(.word)) + ":" + regexWithTooManyCaptures + ":" + TryCapture(OneOrMore(.word)) { Int($0) } + #/:(\d+):/# + } + XCTAssert(type(of: dslWithTooManyCaptures).self + == Regex<(Substring, Substring, Int, Substring)>.self) + + let match = try XCTUnwrap(alpha.wholeMatch(of: dslWithTooManyCaptures)) + XCTAssertEqual(match.output.0, alpha[...]) + XCTAssertEqual(match.output.1, "AAA") + XCTAssertEqual(match.output.2, 123) + XCTAssertEqual(match.output.3, "456") + + // All captures groups are available through `AnyRegexOutput`. + let anyOutput = AnyRegexOutput(match) + XCTAssertEqual(anyOutput.count, 17) + XCTAssertEqual(anyOutput[0].substring, alpha[...]) + XCTAssertEqual(anyOutput[1].substring, "AAA") + for (offset, letter) in "abcdefghijklm".enumerated() { + XCTAssertEqual(anyOutput[offset + 2].substring, String(letter)[...]) + } + XCTAssertEqual(anyOutput[15].substring, "123") + XCTAssertEqual(anyOutput[15].value as? Int, 123) + XCTAssertEqual(anyOutput[16].substring, "456") + } + } } diff --git a/Tests/RegexTests/CaptureTests.swift b/Tests/RegexTests/CaptureTests.swift index 26093bc64..85aecd210 100644 --- a/Tests/RegexTests/CaptureTests.swift +++ b/Tests/RegexTests/CaptureTests.swift @@ -16,15 +16,15 @@ import XCTest extension CaptureList.Capture { static var cap: Self { - return Self(optionalDepth: 0, .fake) + return Self(optionalDepth: 0, visibleInTypedOutput: true, .fake) } static var opt: Self { - return Self(optionalDepth: 1, .fake) + return Self(optionalDepth: 1, visibleInTypedOutput: true, .fake) } static func named(_ name: String, opt: Int = 0) -> Self { - return Self(name: name, optionalDepth: opt, .fake) + return Self(name: name, optionalDepth: opt, visibleInTypedOutput: true, .fake) } } extension CaptureList { From 95b23d4abe97946f5dfc774e69e90eeaa2d35667 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Mon, 6 Feb 2023 12:06:22 -0800 Subject: [PATCH 2/9] Resolve more cases of output type mismatch Following on @rxwei's note about wholeMatchType, I found some more instances where the DSLTree-generated output type doesn't match what the builder overloads produce. (In particular, when the "noncompliant" regex component is the first one in the tree.) This change catches those as well, and includes some additional tests for those cases. --- Sources/RegexBuilder/DSL.swift | 62 +++++++ Sources/RegexBuilder/Variadics.swift | 101 +++--------- .../VariadicsGenerator.swift | 33 +++- Tests/RegexBuilderTests/CustomTests.swift | 151 ++++++++++++++---- Tests/RegexBuilderTests/MotivationTests.swift | 6 +- 5 files changed, 232 insertions(+), 121 deletions(-) diff --git a/Sources/RegexBuilder/DSL.swift b/Sources/RegexBuilder/DSL.swift index 152aadd0c..680f3bd2f 100644 --- a/Sources/RegexBuilder/DSL.swift +++ b/Sources/RegexBuilder/DSL.swift @@ -508,3 +508,65 @@ extension Regex.Match { internal func makeFactory() -> _RegexFactory { _RegexFactory() } + +/// These are special `accumulate` methods that wrap one or both components in +/// a node that indicates that that their output types shouldn't be included in +/// the resulting strongly-typed output type. This is required from a +/// `buildPartialBlock` call where a component's output type is either ignored +/// or not included in the resulting type. For example: +/// +/// static func buildPartialBlock( +/// accumulated: R0, next: R1 +/// ) -> Regex<(Substring, C1)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1) +/// +/// In this `buildPartialBlock` overload, `W0` isn't included in the +/// resulting output type, even though it can match any output type, including +/// a tuple. When `W0` matches a tuple type that doesn't match another overload +/// (because of arity or labels) we need this "ignoring" variant so that we +/// don't have a type mismatch when we ultimately cast the type-erased output +/// to the expected type. +@available(SwiftStdlib 5.7, *) +extension _RegexFactory { + /// Concatenates the `left` and `right` component, wrapping `right` to + /// indicate that its output type shouldn't be included in the resulting + /// strongly-typed output type. + @_alwaysEmitIntoClient + internal func accumulate( + _ left: some RegexComponent, + ignoringOutputTypeOf right: some RegexComponent + ) -> Regex { + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return accumulate(left, ignoreCapturesInTypedOutput(right)) + } + return accumulate(left, right) + } + + /// Concatenates the `left` and `right` component, wrapping `left` to + /// indicate that its output type shouldn't be included in the resulting + /// strongly-typed output type. + @_alwaysEmitIntoClient + internal func accumulate( + ignoringOutputTypeOf left: some RegexComponent, + _ right: some RegexComponent + ) -> Regex { + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return accumulate(ignoreCapturesInTypedOutput(left), right) + } + return accumulate(left, right) + } + + /// Concatenates the `left` and `right` component, wrapping both sides to + /// indicate that their output types shouldn't be included in the resulting + /// strongly-typed output type. + @_alwaysEmitIntoClient + internal func accumulate( + ignoringOutputTypeOf left: some RegexComponent, + andAlso right: some RegexComponent + ) -> Regex { + if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { + return accumulate( + ignoreCapturesInTypedOutput(left), ignoreCapturesInTypedOutput(right)) + } + return accumulate(left, right) + } +} diff --git a/Sources/RegexBuilder/Variadics.swift b/Sources/RegexBuilder/Variadics.swift index cbd11e192..f11727521 100644 --- a/Sources/RegexBuilder/Variadics.swift +++ b/Sources/RegexBuilder/Variadics.swift @@ -20,7 +20,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -30,7 +30,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -40,7 +40,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -50,7 +50,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3, C4)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3, C4) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -60,7 +60,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3, C4, C5)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3, C4, C5) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -70,7 +70,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3, C4, C5, C6)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -80,7 +80,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3, C4, C5, C6, C7)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -90,7 +90,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3, C4, C5, C6, C7, C8)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -100,7 +100,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3, C4, C5, C6, C7, C8, C9)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -110,7 +110,7 @@ extension RegexComponentBuilder { accumulated: R0, next: R1 ) -> Regex<(Substring, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10)> where R0.RegexOutput == W0, R1.RegexOutput == (W1, C1, C2, C3, C4, C5, C6, C7, C8, C9, C10) { let factory = makeFactory() - return factory.accumulate(accumulated, next) + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) } } @available(SwiftStdlib 5.7, *) @@ -565,167 +565,112 @@ extension RegexComponentBuilder { } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex where R0.RegexOutput == W0 { let factory = makeFactory() - if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { - return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) - } else { - return factory.accumulate(accumulated, next) - } + return factory.accumulate(ignoringOutputTypeOf: accumulated, andAlso: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0)> where R0.RegexOutput == (W0, C0) { let factory = makeFactory() - if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { - return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) - } else { - return factory.accumulate(accumulated, next) - } + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1)> where R0.RegexOutput == (W0, C0, C1) { let factory = makeFactory() - if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { - return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) - } else { - return factory.accumulate(accumulated, next) - } + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2)> where R0.RegexOutput == (W0, C0, C1, C2) { let factory = makeFactory() - if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { - return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) - } else { - return factory.accumulate(accumulated, next) - } + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3)> where R0.RegexOutput == (W0, C0, C1, C2, C3) { let factory = makeFactory() - if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { - return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) - } else { - return factory.accumulate(accumulated, next) - } + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4) { let factory = makeFactory() - if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { - return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) - } else { - return factory.accumulate(accumulated, next) - } + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5) { let factory = makeFactory() - if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { - return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) - } else { - return factory.accumulate(accumulated, next) - } + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6) { let factory = makeFactory() - if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { - return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) - } else { - return factory.accumulate(accumulated, next) - } + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6, C7)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6, C7) { let factory = makeFactory() - if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { - return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) - } else { - return factory.accumulate(accumulated, next) - } + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6, C7, C8)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6, C7, C8) { let factory = makeFactory() - if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { - return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) - } else { - return factory.accumulate(accumulated, next) - } + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @available(SwiftStdlib 5.7, *) extension RegexComponentBuilder { - @available(SwiftStdlib 5.7, *) @_alwaysEmitIntoClient public static func buildPartialBlock( accumulated: R0, next: R1 ) -> Regex<(Substring, C0, C1, C2, C3, C4, C5, C6, C7, C8, C9)> where R0.RegexOutput == (W0, C0, C1, C2, C3, C4, C5, C6, C7, C8, C9) { let factory = makeFactory() - if #available(macOS 9999, iOS 9999, watchOS 9999, tvOS 9999, *) { - return factory.accumulate(accumulated, factory.ignoreCapturesInTypedOutput(next)) - } else { - return factory.accumulate(accumulated, next) - } + return factory.accumulate(accumulated, ignoringOutputTypeOf: next) } } @@ -6928,7 +6873,3 @@ extension TryCapture { self.init(factory.captureOptional(componentBuilder(), reference._raw, transform)) } } - - - -// END AUTO-GENERATED CONTENT diff --git a/Sources/VariadicsGenerator/VariadicsGenerator.swift b/Sources/VariadicsGenerator/VariadicsGenerator.swift index 092b98da7..42673ccfd 100644 --- a/Sources/VariadicsGenerator/VariadicsGenerator.swift +++ b/Sources/VariadicsGenerator/VariadicsGenerator.swift @@ -262,7 +262,20 @@ struct VariadicsGenerator: ParsableCommand { accumulated: R0, next: R1 ) -> \(regexTypeName)<\(matchType)> \(whereClause) { let factory = makeFactory() + + """) + if leftArity == 0 { + output(""" + return factory.accumulate(ignoringOutputTypeOf: accumulated, next) + + """) + } else { + output(""" return factory.accumulate(accumulated, next) + + """) + } + output(""" } } @@ -274,7 +287,6 @@ struct VariadicsGenerator: ParsableCommand { output(""" \(defaultAvailableAttr) extension \(concatBuilderName) { - \(defaultAvailableAttr) @_alwaysEmitIntoClient public static func buildPartialBlock( _ regex: Regex, - _ tests: (input: String, call: MatchCall, match: Match?)... + _ tests: (input: String, call: MatchCall, match: Match?)..., + file: StaticString = #file, + line: UInt = #line ) { for (input, call, match) in tests { let result: Match? @@ -129,7 +131,40 @@ func customTest( case .firstMatch: result = input.firstMatch(of: regex)?.output } - XCTAssertEqual(result, match) + XCTAssertEqual(result, match, file: file, line: line) + } +} + +func customTest( + _ regex: Regex, + _ isEquivalent: (Match, Match) -> Bool, + _ tests: (input: String, call: MatchCall, match: Match?)..., + file: StaticString = #file, + line: UInt = #line +) { + for (input, call, match) in tests { + let result: Match? + switch call { + case .match: + result = input.wholeMatch(of: regex)?.output + case .firstMatch: + result = input.firstMatch(of: regex)?.output + } + switch (result, match) { + case let (result?, match?): + XCTAssert( + isEquivalent(result, match), + "'\(result)' isn't equal to '\(match)'.", + file: file, line: line) + case (nil, nil): + // Success + break + case (nil, _): + XCTFail("No match when expected", file: file, line: line) + case (_, nil): + XCTFail("Unexpected match", file: file, line: line) + } + } } @@ -210,40 +245,92 @@ class CustomRegexComponentTests: XCTestCase { ("abc", .firstMatch, nil), ("55z", .match, nil), ("55z", .firstMatch, 5)) + + customTest( + Regex<(Substring, Substring, Int)> { + #/(\D+)/# + Capture(Numbler()) + }, + ==, + ("ab123c", .firstMatch, ("ab1", "ab", 1)), + ("abc", .firstMatch, nil), + ("123", .firstMatch, nil), + ("a55z", .match, nil), + ("a55z", .firstMatch, ("a5", "a", 5))) + + customTest( + Regex<(Substring, prefix: Substring)> { + #/(?\D+)/# + }, + ==, + ("ab123c", .firstMatch, ("ab", "ab")), + ("abc", .firstMatch, ("abc", "abc")), + ("123", .firstMatch, nil), + ("a55z", .match, nil), + ("a55z", .firstMatch, ("a", "a"))) - // TODO: Convert below tests to better infra. Right now - // it's hard because `Match` is constrained to be - // `Equatable` which tuples cannot be. + customTest( + Regex { + #/(?\D+)/# + Optionally("~") + }, + ("ab123c", .firstMatch, "ab"), + ("abc", .firstMatch, "abc"), + ("123", .firstMatch, nil), + ("a55z", .match, nil), + ("a55z", .firstMatch, "a")) - let regex3 = Regex { - Capture { + customTest( + Regex<(Substring, Int)> { + #/(?\D+)/# + Capture(Numbler()) + }, + ==, + ("ab123c", .firstMatch, ("ab1", 1)), + ("abc", .firstMatch, nil), + ("123", .firstMatch, nil), + ("a55z", .match, nil), + ("a55z", .firstMatch, ("a5", 5))) + + customTest( + Regex<(Substring, Int, Substring)> { + #/(?\D+)/# + Regex { + Capture(Numbler()) + Capture(OneOrMore(.word)) + } + }, + ==, + ("ab123c", .firstMatch, ("ab123c", 1, "23c")), + ("abc", .firstMatch, nil), + ("123", .firstMatch, nil), + ("a55z", .match, ("a55z", 5, "5z")), + ("a55z", .firstMatch, ("a55z", 5, "5z"))) + + customTest( + Regex<(Substring, Substring)> { + Capture { + OneOrMore { + Numbler() + } + } + }, + ==, + ("abc123", .firstMatch, ("123", "123")), + ("abc123", .match, nil), + ("abc", .firstMatch, nil)) + + customTest( + Regex<(Substring, Int)> { OneOrMore { - Numbler() + Capture { Numbler() } } - } - } - - let str = "ab123c" - let res3 = try XCTUnwrap(str.firstMatch(of: regex3)) - - let expectedSubstring = str.dropFirst(2).prefix(3) - XCTAssertEqual(res3.range, expectedSubstring.startIndex.. TransactionKind? in TransactionKind(rawValue: String(s)) } @@ -322,7 +322,7 @@ extension RegexDSLTests { Repeat(.digit, count: 2) Repeat(.digit, count: 2) Repeat(.digit, count: 4) - } transform: { (s: Substring) in + } transform: { (s: Substring) -> Date? in Date(mmddyyyy: String(s)) } @@ -345,7 +345,7 @@ extension RegexDSLTests { OneOrMore(.digit) "." Repeat(.digit, count: 2) - } transform: { (s: Substring) in + } transform: { (s: Substring) -> Double? in Double(s) } } From 1aca1fa870935bb5e4b81405bec7bd08153e666d Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Mon, 6 Feb 2023 17:09:42 -0800 Subject: [PATCH 3/9] Work around Linux test crasher Linux seems to crash on different tests when the two customTest overloads have `internal` visibility or are called. Temporarily skipping those tests on Linux while I try to reduce the problem further (which doesn't make much sense to me). --- Tests/RegexBuilderTests/CustomTests.swift | 33 ++++++++++++----------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/Tests/RegexBuilderTests/CustomTests.swift b/Tests/RegexBuilderTests/CustomTests.swift index 81a1fdb30..d0c0feba1 100644 --- a/Tests/RegexBuilderTests/CustomTests.swift +++ b/Tests/RegexBuilderTests/CustomTests.swift @@ -117,7 +117,7 @@ enum MatchCall { case firstMatch } -func customTest( +fileprivate func customTest( _ regex: Regex, _ tests: (input: String, call: MatchCall, match: Match?)..., file: StaticString = #file, @@ -135,7 +135,7 @@ func customTest( } } -func customTest( +fileprivate func customTest( _ regex: Regex, _ isEquivalent: (Match, Match) -> Bool, _ tests: (input: String, call: MatchCall, match: Match?)..., @@ -164,7 +164,6 @@ func customTest( case (_, nil): XCTFail("Unexpected match", file: file, line: line) } - } } @@ -245,7 +244,19 @@ class CustomRegexComponentTests: XCTestCase { ("abc", .firstMatch, nil), ("55z", .match, nil), ("55z", .firstMatch, 5)) - + + customTest( + Regex { + #/(?\D+)/# + Optionally("~") + }, + ("ab123c", .firstMatch, "ab"), + ("abc", .firstMatch, "abc"), + ("123", .firstMatch, nil), + ("a55z", .match, nil), + ("a55z", .firstMatch, "a")) + +#if !os(Linux) customTest( Regex<(Substring, Substring, Int)> { #/(\D+)/# @@ -257,7 +268,7 @@ class CustomRegexComponentTests: XCTestCase { ("123", .firstMatch, nil), ("a55z", .match, nil), ("a55z", .firstMatch, ("a5", "a", 5))) - + customTest( Regex<(Substring, prefix: Substring)> { #/(?\D+)/# @@ -269,17 +280,6 @@ class CustomRegexComponentTests: XCTestCase { ("a55z", .match, nil), ("a55z", .firstMatch, ("a", "a"))) - customTest( - Regex { - #/(?\D+)/# - Optionally("~") - }, - ("ab123c", .firstMatch, "ab"), - ("abc", .firstMatch, "abc"), - ("123", .firstMatch, nil), - ("a55z", .match, nil), - ("a55z", .firstMatch, "a")) - customTest( Regex<(Substring, Int)> { #/(?\D+)/# @@ -331,6 +331,7 @@ class CustomRegexComponentTests: XCTestCase { ("abc", .firstMatch, nil), ("55z", .match, nil), ("55z", .firstMatch, ("55", 5))) +#endif } func testRegexAbort() { From 573174016629512cddf1eb93e795c6e43f6b27e9 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Tue, 7 Feb 2023 09:10:55 -0800 Subject: [PATCH 4/9] Improve workaround for crashing test Seems to be happening on macOS, too. --- Tests/RegexBuilderTests/CustomTests.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tests/RegexBuilderTests/CustomTests.swift b/Tests/RegexBuilderTests/CustomTests.swift index d0c0feba1..bbf26de21 100644 --- a/Tests/RegexBuilderTests/CustomTests.swift +++ b/Tests/RegexBuilderTests/CustomTests.swift @@ -136,7 +136,7 @@ fileprivate func customTest( } fileprivate func customTest( - _ regex: Regex, + _ regex: some RegexComponent, _ isEquivalent: (Match, Match) -> Bool, _ tests: (input: String, call: MatchCall, match: Match?)..., file: StaticString = #file, From c64477e0d3f6510892762e3be6fc3b37821b934e Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Tue, 7 Feb 2023 15:48:07 -0800 Subject: [PATCH 5/9] Include availability information in RegexBuilderTests We've had availability turned off for RegexBuilderTests (presumably for convenience), but then we can't test functionality that depends on availability, like the fix for RegexBuilder output type mismatches. --- Package.swift | 2 +- Tests/RegexBuilderTests/AlgorithmsTests.swift | 2 + .../AnyRegexOutputTests.swift | 1 + Tests/RegexBuilderTests/CustomTests.swift | 81 ++++++++++--------- Tests/RegexBuilderTests/MotivationTests.swift | 1 + Tests/RegexBuilderTests/RegexDSLTests.swift | 1 + 6 files changed, 50 insertions(+), 38 deletions(-) diff --git a/Package.swift b/Package.swift index ba3c867de..5d45950db 100644 --- a/Package.swift +++ b/Package.swift @@ -91,7 +91,7 @@ let package = Package( name: "RegexBuilderTests", dependencies: ["_StringProcessing", "RegexBuilder", "TestSupport"], swiftSettings: [ - .unsafeFlags(["-Xfrontend", "-disable-availability-checking"]) + availabilityDefinition ]), .testTarget( name: "DocumentationTests", diff --git a/Tests/RegexBuilderTests/AlgorithmsTests.swift b/Tests/RegexBuilderTests/AlgorithmsTests.swift index dcaddd9d7..7d24e30af 100644 --- a/Tests/RegexBuilderTests/AlgorithmsTests.swift +++ b/Tests/RegexBuilderTests/AlgorithmsTests.swift @@ -13,6 +13,7 @@ import XCTest import _StringProcessing import RegexBuilder +@available(SwiftStdlib 5.7, *) class RegexConsumerTests: XCTestCase { func testMatches() { let regex = Capture(OneOrMore(.digit)) { 2 * Int($0)! } @@ -105,6 +106,7 @@ class RegexConsumerTests: XCTestCase { } } +@available(SwiftStdlib 5.7, *) class AlgorithmsResultBuilderTests: XCTestCase { enum MatchAlgo { case whole diff --git a/Tests/RegexBuilderTests/AnyRegexOutputTests.swift b/Tests/RegexBuilderTests/AnyRegexOutputTests.swift index e6c3214b9..165d1d411 100644 --- a/Tests/RegexBuilderTests/AnyRegexOutputTests.swift +++ b/Tests/RegexBuilderTests/AnyRegexOutputTests.swift @@ -5,6 +5,7 @@ import RegexBuilder private let enablePrinting = false +@available(SwiftStdlib 5.7, *) extension RegexDSLTests { func testContrivedAROExample() { diff --git a/Tests/RegexBuilderTests/CustomTests.swift b/Tests/RegexBuilderTests/CustomTests.swift index bbf26de21..85186b684 100644 --- a/Tests/RegexBuilderTests/CustomTests.swift +++ b/Tests/RegexBuilderTests/CustomTests.swift @@ -14,10 +14,12 @@ import _StringProcessing @testable import RegexBuilder // A nibbler processes a single character from a string +@available(SwiftStdlib 5.7, *) private protocol Nibbler: CustomConsumingRegexComponent { func nibble(_: Character) -> RegexOutput? } +@available(SwiftStdlib 5.7, *) extension Nibbler { // Default implementation, just feed the character in func consuming( @@ -34,6 +36,7 @@ extension Nibbler { // A number nibbler +@available(SwiftStdlib 5.7, *) private struct Numbler: Nibbler { typealias RegexOutput = Int func nibble(_ c: Character) -> Int? { @@ -42,6 +45,7 @@ private struct Numbler: Nibbler { } // An ASCII value nibbler +@available(SwiftStdlib 5.7, *) private struct Asciibbler: Nibbler { typealias RegexOutput = UInt8 func nibble(_ c: Character) -> UInt8? { @@ -49,6 +53,7 @@ private struct Asciibbler: Nibbler { } } +@available(SwiftStdlib 5.7, *) private struct IntParser: CustomConsumingRegexComponent { struct ParseError: Error, Hashable {} typealias RegexOutput = Int @@ -71,6 +76,7 @@ private struct IntParser: CustomConsumingRegexComponent { } } +@available(SwiftStdlib 5.7, *) private struct CurrencyParser: CustomConsumingRegexComponent { enum Currency: String, Hashable { case usd = "USD" @@ -117,6 +123,7 @@ enum MatchCall { case firstMatch } +@available(SwiftStdlib 5.7, *) fileprivate func customTest( _ regex: Regex, _ tests: (input: String, call: MatchCall, match: Match?)..., @@ -135,6 +142,7 @@ fileprivate func customTest( } } +@available(SwiftStdlib 5.7, *) fileprivate func customTest( _ regex: some RegexComponent, _ isEquivalent: (Match, Match) -> Bool, @@ -212,6 +220,7 @@ extension Concat: BidirectionalCollection { } } +@available(SwiftStdlib 5.7, *) class CustomRegexComponentTests: XCTestCase { // TODO: Refactor below into more exhaustive, declarative // tests. @@ -245,18 +254,17 @@ class CustomRegexComponentTests: XCTestCase { ("55z", .match, nil), ("55z", .firstMatch, 5)) - customTest( - Regex { - #/(?\D+)/# - Optionally("~") - }, - ("ab123c", .firstMatch, "ab"), - ("abc", .firstMatch, "abc"), - ("123", .firstMatch, nil), - ("a55z", .match, nil), - ("a55z", .firstMatch, "a")) +// customTest( +// Regex { +// #/(?\D+)/# +// Optionally("~") +// }, +// ("ab123c", .firstMatch, "ab"), +// ("abc", .firstMatch, "abc"), +// ("123", .firstMatch, nil), +// ("a55z", .match, nil), +// ("a55z", .firstMatch, "a")) -#if !os(Linux) customTest( Regex<(Substring, Substring, Int)> { #/(\D+)/# @@ -280,32 +288,32 @@ class CustomRegexComponentTests: XCTestCase { ("a55z", .match, nil), ("a55z", .firstMatch, ("a", "a"))) - customTest( - Regex<(Substring, Int)> { - #/(?\D+)/# - Capture(Numbler()) - }, - ==, - ("ab123c", .firstMatch, ("ab1", 1)), - ("abc", .firstMatch, nil), - ("123", .firstMatch, nil), - ("a55z", .match, nil), - ("a55z", .firstMatch, ("a5", 5))) +// customTest( +// Regex<(Substring, Int)> { +// #/(?\D+)/# +// Capture(Numbler()) +// }, +// ==, +// ("ab123c", .firstMatch, ("ab1", 1)), +// ("abc", .firstMatch, nil), +// ("123", .firstMatch, nil), +// ("a55z", .match, nil), +// ("a55z", .firstMatch, ("a5", 5))) - customTest( - Regex<(Substring, Int, Substring)> { - #/(?\D+)/# - Regex { - Capture(Numbler()) - Capture(OneOrMore(.word)) - } - }, - ==, - ("ab123c", .firstMatch, ("ab123c", 1, "23c")), - ("abc", .firstMatch, nil), - ("123", .firstMatch, nil), - ("a55z", .match, ("a55z", 5, "5z")), - ("a55z", .firstMatch, ("a55z", 5, "5z"))) +// customTest( +// Regex<(Substring, Int, Substring)> { +// #/(?\D+)/# +// Regex { +// Capture(Numbler()) +// Capture(OneOrMore(.word)) +// } +// }, +// ==, +// ("ab123c", .firstMatch, ("ab123c", 1, "23c")), +// ("abc", .firstMatch, nil), +// ("123", .firstMatch, nil), +// ("a55z", .match, ("a55z", 5, "5z")), +// ("a55z", .firstMatch, ("a55z", 5, "5z"))) customTest( Regex<(Substring, Substring)> { @@ -331,7 +339,6 @@ class CustomRegexComponentTests: XCTestCase { ("abc", .firstMatch, nil), ("55z", .match, nil), ("55z", .firstMatch, ("55", 5))) -#endif } func testRegexAbort() { diff --git a/Tests/RegexBuilderTests/MotivationTests.swift b/Tests/RegexBuilderTests/MotivationTests.swift index c1ecb50fa..06511d5b8 100644 --- a/Tests/RegexBuilderTests/MotivationTests.swift +++ b/Tests/RegexBuilderTests/MotivationTests.swift @@ -261,6 +261,7 @@ extension RegexDSLTests { #endif +@available(SwiftStdlib 5.7, *) extension RegexDSLTests { func testProposalExample() { let statement = """ diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index 8b7611536..7882a2d97 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -14,6 +14,7 @@ import _StringProcessing import RegexBuilder import TestSupport +@available(SwiftStdlib 5.7, *) class RegexDSLTests: XCTestCase { func _testDSLCaptures( _ tests: (input: String, expectedCaptures: MatchType?)..., From 034b582c30914f5dd41d564ce621af9f28538778 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Wed, 8 Feb 2023 10:10:57 -0800 Subject: [PATCH 6/9] Move `testLabeledCapturesInDSL` into RegexBuilderTests This should allow the test to take advantage of availability --- .../RegexBuilderTests.swift | 113 ------------------ Tests/RegexBuilderTests/RegexDSLTests.swift | 113 ++++++++++++++++++ 2 files changed, 113 insertions(+), 113 deletions(-) diff --git a/Tests/DocumentationTests/RegexBuilderTests.swift b/Tests/DocumentationTests/RegexBuilderTests.swift index 0f1813e40..d0ae36e01 100644 --- a/Tests/DocumentationTests/RegexBuilderTests.swift +++ b/Tests/DocumentationTests/RegexBuilderTests.swift @@ -204,117 +204,4 @@ extension RegexBuilderTests { XCTAssertEqual(matches.count, 1) XCTAssertEqual(matches[0].1, 121.54) } - - func testLabeledCapturesInDSL() throws { - let oneNumericField = "abc:123:def" - let twoNumericFields = "abc:123:def:456:ghi" - - let regexWithCapture = #/:(\d+):/# - let regexWithLabeledCapture = #/:(?\d+):/# - let regexWithNonCapture = #/:(?:\d+):/# - - do { - // The output type of a regex with unlabeled captures is concatenated. - let dslWithCapture = Regex { - OneOrMore(.word) - regexWithCapture - OneOrMore(.word) - } - XCTAssert(type(of: dslWithCapture).self == Regex<(Substring, Substring)>.self) - - let output = try XCTUnwrap(oneNumericField.wholeMatch(of: dslWithCapture)?.output) - XCTAssertEqual(output.0, oneNumericField[...]) - XCTAssertEqual(output.1, "123") - } - do { - // The output type of a regex with a labeled capture is dropped. - let dslWithLabeledCapture = Regex { - OneOrMore(.word) - regexWithLabeledCapture - OneOrMore(.word) - } - XCTAssert(type(of: dslWithLabeledCapture).self == Regex.self) - - let match = try XCTUnwrap(oneNumericField.wholeMatch(of: dslWithLabeledCapture)) - XCTAssertEqual(match.output, oneNumericField[...]) - - // We can recover the ignored captures by converting to `AnyRegexOutput`. - let anyOutput = AnyRegexOutput(match) - XCTAssertEqual(anyOutput.count, 2) - XCTAssertEqual(anyOutput[0].substring, oneNumericField[...]) - XCTAssertEqual(anyOutput[1].substring, "123") - XCTAssertEqual(anyOutput["number"]?.substring, "123") - } - do { - let coalescingWithCapture = Regex { - "e" as Character - #/\u{301}(\d*)/# - } - XCTAssertNotNil(try coalescingWithCapture.firstMatch(in: "e\u{301}")) - XCTAssertNotNil(try coalescingWithCapture.firstMatch(in: "é")) - - let coalescingWithLabeledCapture = Regex { - "e" as Character - #/\u{301}(?\d*)/# - } - XCTAssertNotNil(try coalescingWithLabeledCapture.firstMatch(in: "e\u{301}")) - XCTAssertNotNil(try coalescingWithLabeledCapture.firstMatch(in: "é")) - } - do { - // Only the output type of a regex with a labeled capture is dropped, - // outputs of other regexes in the same DSL are concatenated. - let dslWithBothCaptures = Regex { - OneOrMore(.word) - regexWithCapture - OneOrMore(.word) - regexWithLabeledCapture - OneOrMore(.word) - } - XCTAssert(type(of: dslWithBothCaptures).self == Regex<(Substring, Substring)>.self) - - let match = try XCTUnwrap(twoNumericFields.wholeMatch(of: dslWithBothCaptures)) - XCTAssertEqual(match.output.0, twoNumericFields[...]) - XCTAssertEqual(match.output.1, "123") - - let anyOutput = AnyRegexOutput(match) - XCTAssertEqual(anyOutput.count, 3) - XCTAssertEqual(anyOutput[0].substring, twoNumericFields[...]) - XCTAssertEqual(anyOutput[1].substring, "123") - XCTAssertEqual(anyOutput[2].substring, "456") - } - do { - // The output type of a regex with too many captures is dropped. - // "Too many" means the left and right output types would add up to >= 10. - let alpha = "AAA:abcdefghijklm:123:456:" - let regexWithTooManyCaptures = #/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)/# - let dslWithTooManyCaptures = Regex { - Capture(OneOrMore(.word)) - ":" - regexWithTooManyCaptures - ":" - TryCapture(OneOrMore(.word)) { Int($0) } - #/:(\d+):/# - } - XCTAssert(type(of: dslWithTooManyCaptures).self - == Regex<(Substring, Substring, Int, Substring)>.self) - - let match = try XCTUnwrap(alpha.wholeMatch(of: dslWithTooManyCaptures)) - XCTAssertEqual(match.output.0, alpha[...]) - XCTAssertEqual(match.output.1, "AAA") - XCTAssertEqual(match.output.2, 123) - XCTAssertEqual(match.output.3, "456") - - // All captures groups are available through `AnyRegexOutput`. - let anyOutput = AnyRegexOutput(match) - XCTAssertEqual(anyOutput.count, 17) - XCTAssertEqual(anyOutput[0].substring, alpha[...]) - XCTAssertEqual(anyOutput[1].substring, "AAA") - for (offset, letter) in "abcdefghijklm".enumerated() { - XCTAssertEqual(anyOutput[offset + 2].substring, String(letter)[...]) - } - XCTAssertEqual(anyOutput[15].substring, "123") - XCTAssertEqual(anyOutput[15].value as? Int, 123) - XCTAssertEqual(anyOutput[16].substring, "456") - } - } } diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index 7882a2d97..194dddfe9 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -1756,6 +1756,119 @@ class RegexDSLTests: XCTestCase { XCTFail("Expected to match capture") } } + + func testLabeledCapturesInDSL() throws { + let oneNumericField = "abc:123:def" + let twoNumericFields = "abc:123:def:456:ghi" + + let regexWithCapture = #/:(\d+):/# + let regexWithLabeledCapture = #/:(?\d+):/# + let regexWithNonCapture = #/:(?:\d+):/# + + do { + // The output type of a regex with unlabeled captures is concatenated. + let dslWithCapture = Regex { + OneOrMore(.word) + regexWithCapture + OneOrMore(.word) + } + XCTAssert(type(of: dslWithCapture).self == Regex<(Substring, Substring)>.self) + + let output = try XCTUnwrap(oneNumericField.wholeMatch(of: dslWithCapture)?.output) + XCTAssertEqual(output.0, oneNumericField[...]) + XCTAssertEqual(output.1, "123") + } + do { + // The output type of a regex with a labeled capture is dropped. + let dslWithLabeledCapture = Regex { + OneOrMore(.word) + regexWithLabeledCapture + OneOrMore(.word) + } + XCTAssert(type(of: dslWithLabeledCapture).self == Regex.self) + + let match = try XCTUnwrap(oneNumericField.wholeMatch(of: dslWithLabeledCapture)) + XCTAssertEqual(match.output, oneNumericField[...]) + + // We can recover the ignored captures by converting to `AnyRegexOutput`. + let anyOutput = AnyRegexOutput(match) + XCTAssertEqual(anyOutput.count, 2) + XCTAssertEqual(anyOutput[0].substring, oneNumericField[...]) + XCTAssertEqual(anyOutput[1].substring, "123") + XCTAssertEqual(anyOutput["number"]?.substring, "123") + } + do { + let coalescingWithCapture = Regex { + "e" as Character + #/\u{301}(\d*)/# + } + XCTAssertNotNil(try coalescingWithCapture.firstMatch(in: "e\u{301}")) + XCTAssertNotNil(try coalescingWithCapture.firstMatch(in: "é")) + + let coalescingWithLabeledCapture = Regex { + "e" as Character + #/\u{301}(?\d*)/# + } + XCTAssertNotNil(try coalescingWithLabeledCapture.firstMatch(in: "e\u{301}")) + XCTAssertNotNil(try coalescingWithLabeledCapture.firstMatch(in: "é")) + } + do { + // Only the output type of a regex with a labeled capture is dropped, + // outputs of other regexes in the same DSL are concatenated. + let dslWithBothCaptures = Regex { + OneOrMore(.word) + regexWithCapture + OneOrMore(.word) + regexWithLabeledCapture + OneOrMore(.word) + } + XCTAssert(type(of: dslWithBothCaptures).self == Regex<(Substring, Substring)>.self) + + let match = try XCTUnwrap(twoNumericFields.wholeMatch(of: dslWithBothCaptures)) + XCTAssertEqual(match.output.0, twoNumericFields[...]) + XCTAssertEqual(match.output.1, "123") + + let anyOutput = AnyRegexOutput(match) + XCTAssertEqual(anyOutput.count, 3) + XCTAssertEqual(anyOutput[0].substring, twoNumericFields[...]) + XCTAssertEqual(anyOutput[1].substring, "123") + XCTAssertEqual(anyOutput[2].substring, "456") + } + do { + // The output type of a regex with too many captures is dropped. + // "Too many" means the left and right output types would add up to >= 10. + let alpha = "AAA:abcdefghijklm:123:456:" + let regexWithTooManyCaptures = #/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)/# + let dslWithTooManyCaptures = Regex { + Capture(OneOrMore(.word)) + ":" + regexWithTooManyCaptures + ":" + TryCapture(OneOrMore(.word)) { Int($0) } + #/:(\d+):/# + } + XCTAssert(type(of: dslWithTooManyCaptures).self + == Regex<(Substring, Substring, Int, Substring)>.self) + + let match = try XCTUnwrap(alpha.wholeMatch(of: dslWithTooManyCaptures)) + XCTAssertEqual(match.output.0, alpha[...]) + XCTAssertEqual(match.output.1, "AAA") + XCTAssertEqual(match.output.2, 123) + XCTAssertEqual(match.output.3, "456") + + // All captures groups are available through `AnyRegexOutput`. + let anyOutput = AnyRegexOutput(match) + XCTAssertEqual(anyOutput.count, 17) + XCTAssertEqual(anyOutput[0].substring, alpha[...]) + XCTAssertEqual(anyOutput[1].substring, "AAA") + for (offset, letter) in "abcdefghijklm".enumerated() { + XCTAssertEqual(anyOutput[offset + 2].substring, String(letter)[...]) + } + XCTAssertEqual(anyOutput[15].substring, "123") + XCTAssertEqual(anyOutput[15].value as? Int, 123) + XCTAssertEqual(anyOutput[16].substring, "456") + } + } } extension Unicode.Scalar { From f2f011227eb33a4fbd3fe1bd71c4830ce4e27ef1 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Thu, 9 Feb 2023 09:17:43 -0800 Subject: [PATCH 7/9] Split labeled capture tests into individual methods --- Tests/RegexBuilderTests/RegexDSLTests.swift | 34 +++++++++++++-------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index 194dddfe9..7e4e6f3db 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -58,7 +58,7 @@ class RegexDSLTests: XCTestCase { let regex = Regex { "a" Capture(Character("b")) // Character - TryCapture("1") { Int($0) } // Int + TryCapture { "1" } transform: { Int($0) } // Int } // Assert the inferred capture type. let _: (Substring, Substring, Int).Type = type(of: regex).RegexOutput.self @@ -1756,16 +1756,21 @@ class RegexDSLTests: XCTestCase { XCTFail("Expected to match capture") } } +} - func testLabeledCapturesInDSL() throws { - let oneNumericField = "abc:123:def" - let twoNumericFields = "abc:123:def:456:ghi" +fileprivate let oneNumericField = "abc:123:def" +fileprivate let twoNumericFields = "abc:123:def:456:ghi" - let regexWithCapture = #/:(\d+):/# - let regexWithLabeledCapture = #/:(?\d+):/# - let regexWithNonCapture = #/:(?:\d+):/# +@available(SwiftStdlib 5.7, *) +fileprivate let regexWithCapture = #/:(\d+):/# +@available(SwiftStdlib 5.7, *) +fileprivate let regexWithLabeledCapture = #/:(?\d+):/# +@available(SwiftStdlib 5.7, *) +fileprivate let regexWithNonCapture = #/:(?:\d+):/# - do { +@available(SwiftStdlib 5.7, *) +extension RegexDSLTests { + func testLabeledCaptures_regularCapture() throws { // The output type of a regex with unlabeled captures is concatenated. let dslWithCapture = Regex { OneOrMore(.word) @@ -1778,7 +1783,8 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(output.0, oneNumericField[...]) XCTAssertEqual(output.1, "123") } - do { + + func testLabeledCaptures_labeledCapture() throws { // The output type of a regex with a labeled capture is dropped. let dslWithLabeledCapture = Regex { OneOrMore(.word) @@ -1797,7 +1803,8 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(anyOutput[1].substring, "123") XCTAssertEqual(anyOutput["number"]?.substring, "123") } - do { + + func testLabeledCaptures_coalescingWithCapture() throws { let coalescingWithCapture = Regex { "e" as Character #/\u{301}(\d*)/# @@ -1812,7 +1819,8 @@ class RegexDSLTests: XCTestCase { XCTAssertNotNil(try coalescingWithLabeledCapture.firstMatch(in: "e\u{301}")) XCTAssertNotNil(try coalescingWithLabeledCapture.firstMatch(in: "é")) } - do { + + func testLabeledCaptures_bothCapture() throws { // Only the output type of a regex with a labeled capture is dropped, // outputs of other regexes in the same DSL are concatenated. let dslWithBothCaptures = Regex { @@ -1834,7 +1842,8 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(anyOutput[1].substring, "123") XCTAssertEqual(anyOutput[2].substring, "456") } - do { + + func testLabeledCaptures_tooManyCapture() throws { // The output type of a regex with too many captures is dropped. // "Too many" means the left and right output types would add up to >= 10. let alpha = "AAA:abcdefghijklm:123:456:" @@ -1869,7 +1878,6 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(anyOutput[16].substring, "456") } } -} extension Unicode.Scalar { // Convert a hexadecimal string to a scalar From 8c63128eed455716b6b0d60653efeab4c9954a1b Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Thu, 9 Feb 2023 10:21:57 -0800 Subject: [PATCH 8/9] Skip conditional wrapping --- Sources/_StringProcessing/Utility/RegexFactory.swift | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Sources/_StringProcessing/Utility/RegexFactory.swift b/Sources/_StringProcessing/Utility/RegexFactory.swift index 584772921..cc3b73a3a 100644 --- a/Sources/_StringProcessing/Utility/RegexFactory.swift +++ b/Sources/_StringProcessing/Utility/RegexFactory.swift @@ -26,9 +26,10 @@ public struct _RegexFactory { _ child: some RegexComponent ) -> Regex { // Don't wrap `child` again if it's a leaf node. - child.regex.root.hasChildNodes - ? .init(node: .ignoreCapturesInTypedOutput(child.regex.root)) - : .init(node: child.regex.root) +// child.regex.root.hasChildNodes +// ? + .init(node: .ignoreCapturesInTypedOutput(child.regex.root)) +// : .init(node: child.regex.root) } @available(SwiftStdlib 5.7, *) From 38e9cefb8ede683bf5ee2e041ea8a9ab11af9551 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Thu, 9 Feb 2023 10:44:56 -0800 Subject: [PATCH 9/9] Skip labeled capture tests on older platforms --- .../Utility/RegexFactory.swift | 7 +- Tests/RegexBuilderTests/RegexDSLTests.swift | 452 +++++++++--------- 2 files changed, 235 insertions(+), 224 deletions(-) diff --git a/Sources/_StringProcessing/Utility/RegexFactory.swift b/Sources/_StringProcessing/Utility/RegexFactory.swift index cc3b73a3a..584772921 100644 --- a/Sources/_StringProcessing/Utility/RegexFactory.swift +++ b/Sources/_StringProcessing/Utility/RegexFactory.swift @@ -26,10 +26,9 @@ public struct _RegexFactory { _ child: some RegexComponent ) -> Regex { // Don't wrap `child` again if it's a leaf node. -// child.regex.root.hasChildNodes -// ? - .init(node: .ignoreCapturesInTypedOutput(child.regex.root)) -// : .init(node: child.regex.root) + child.regex.root.hasChildNodes + ? .init(node: .ignoreCapturesInTypedOutput(child.regex.root)) + : .init(node: child.regex.root) } @available(SwiftStdlib 5.7, *) diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift index 7e4e6f3db..0dd050357 100644 --- a/Tests/RegexBuilderTests/RegexDSLTests.swift +++ b/Tests/RegexBuilderTests/RegexDSLTests.swift @@ -53,7 +53,7 @@ class RegexDSLTests: XCTestCase { file: file, line: line) } } - + func testSimpleStrings() throws { let regex = Regex { "a" @@ -65,19 +65,19 @@ class RegexDSLTests: XCTestCase { let maybeMatch = "ab1".wholeMatch(of: regex) let match = try XCTUnwrap(maybeMatch) XCTAssertTrue(match.output == ("ab1", "b", 1)) - + let substring = "ab1"[...] let substringMatch = try XCTUnwrap(substring.wholeMatch(of: regex)) XCTAssertTrue(match.output == substringMatch.output) } - + let allNewlines = "\u{A}\u{B}\u{C}\u{D}\r\n\u{85}\u{2028}\u{2029}" let asciiNewlines = "\u{A}\u{B}\u{C}\u{D}\r\n" - + func testCharacterClasses() throws { // Must have new stdlib for character class ranges. guard ensureNewStdlib() else { return } - + try _testDSLCaptures( ("a c", ("a c", " ", "c")), matchType: (Substring, Substring, Substring).self, ==) @@ -95,7 +95,7 @@ class RegexDSLTests: XCTestCase { OneOrMore { CharacterClass("a"..."z", .digit) } - + // Second group OneOrMore { ChoiceOf { @@ -104,7 +104,7 @@ class RegexDSLTests: XCTestCase { } } } - + try _testDSLCaptures( ("abc1def2", ("abc1def2", "abc1")), matchType: (Substring, Substring).self, ==) @@ -113,12 +113,12 @@ class RegexDSLTests: XCTestCase { OneOrMore(.digit.inverted) ("a"..."z").inverted } - + OneOrMore { CharacterClass.whitespace.inverted } } - + // `.newlineSequence` and `.verticalWhitespace` match the same set of // newlines in grapheme semantic mode, and scalar mode when applied with // OneOrMore. @@ -147,7 +147,7 @@ class RegexDSLTests: XCTestCase { } }.matchingSemantics(mode) } - + // Try with ASCII-only whitespace. try _testDSLCaptures( ("\n", ("\n", "\n")), @@ -174,7 +174,7 @@ class RegexDSLTests: XCTestCase { } } } - + // `.newlineSequence` in scalar mode may match a single `\r\n`. // `.verticalWhitespace` may not. for asciiOnly in [true, false] { @@ -225,7 +225,7 @@ class RegexDSLTests: XCTestCase { }.matchingSemantics(.unicodeScalar).asciiOnlyWhitespace(asciiOnly) } } - + // Make sure horizontal whitespace does not match newlines or other // vertical whitespace. try _testDSLCaptures( @@ -238,7 +238,7 @@ class RegexDSLTests: XCTestCase { { OneOrMore(.horizontalWhitespace) } - + // Horizontal whitespace in ASCII mode. try _testDSLCaptures( (" \u{9} \t ", " \u{9} \t "), @@ -250,11 +250,11 @@ class RegexDSLTests: XCTestCase { }.asciiOnlyWhitespace() } } - + func testCharacterClassOperations() throws { // Must have new stdlib for character class ranges. guard ensureNewStdlib() else { return } - + try _testDSLCaptures( ("bcdefn1a", "bcdefn1a"), ("nbcdef1a", nil), // fails symmetric difference lookahead @@ -266,15 +266,15 @@ class RegexDSLTests: XCTestCase { let disallowedChars = CharacterClass.hexDigit .symmetricDifference("a"..."z") NegativeLookahead(disallowedChars) // No: 0-9 + g-z - + OneOrMore(("b"..."g").union("d"..."n")) // b-n CharacterClass.digit.subtracting("3"..."9") // 1, 2, non-ascii digits - + CharacterClass.hexDigit.intersection("a"..."z") // a-f } } - + func testAny() throws { // .any matches newlines regardless of matching options. for dotMatchesNewline in [true, false] { @@ -287,7 +287,7 @@ class RegexDSLTests: XCTestCase { }.dotMatchesNewlines(dotMatchesNewline) } } - + // `.anyGraphemeCluster` is the same as `.any` in grapheme mode. for mode in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] { try _testDSLCaptures( @@ -302,7 +302,7 @@ class RegexDSLTests: XCTestCase { One(.anyGraphemeCluster) }.matchingSemantics(mode) } - + // Like `.any` it also always matches newlines. for dotMatchesNewline in [true, false] { try _testDSLCaptures( @@ -316,7 +316,7 @@ class RegexDSLTests: XCTestCase { } } } - + func testAnyNonNewline() throws { // `.anyNonNewline` is `.` without single-line mode. for mode in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] { @@ -333,7 +333,7 @@ class RegexDSLTests: XCTestCase { OneOrMore(.anyNonNewline) }.matchingSemantics(mode).dotMatchesNewlines(dotMatchesNewline) } - + try _testDSLCaptures( ("abcdef", nil), ("abcdef\n", nil), @@ -346,7 +346,7 @@ class RegexDSLTests: XCTestCase { OneOrMore(.anyNonNewline.inverted) }.matchingSemantics(mode).dotMatchesNewlines(dotMatchesNewline) } - + try _testDSLCaptures( ("abc", "abc"), ("abcd", nil), @@ -361,7 +361,7 @@ class RegexDSLTests: XCTestCase { } } } - + try _testDSLCaptures( ("\r\n", "\r\n"), matchType: Substring.self, ==) { CharacterClass.anyNonNewline.inverted @@ -373,12 +373,12 @@ class RegexDSLTests: XCTestCase { }.matchingSemantics(.unicodeScalar) } } - + func testMatchResultDotZeroWithoutCapture() throws { let match = try XCTUnwrap("aaa".wholeMatch { OneOrMore { "a" } }) XCTAssertEqual(match.0, "aaa") } - + func testAlternation() throws { do { let regex = ChoiceOf { @@ -447,7 +447,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNil("aab".wholeMatch(of: regex)?.output) } } - + func testCombinators() throws { try _testDSLCaptures( ("aaaabccccdddkj", ("aaaabccccdddkj", "b", "cccc", "d", "k", nil, "j")), @@ -498,7 +498,7 @@ class RegexDSLTests: XCTestCase { .ignoresCase(true) .ignoresCase(false) } - + // An option on an outer component doesn't override an option set on an // inner component. try _testDSLCaptures( @@ -519,7 +519,7 @@ class RegexDSLTests: XCTestCase { } .ignoresCase(false) } - + // FIXME: Re-enable this test try _testDSLCaptures( ("can't stop won't stop", ("can't stop won't stop", "can't", "won't")), @@ -539,7 +539,7 @@ class RegexDSLTests: XCTestCase { OneOrMore(.any, .reluctant) "stop" } - + // FIXME: Re-enable this test try _testDSLCaptures( ("can't stop won't stop", ("can't stop won't stop", "can", "won")), @@ -600,7 +600,7 @@ class RegexDSLTests: XCTestCase { func testQuantificationBehavior() throws { // Must have new stdlib for character class ranges. guard ensureNewStdlib() else { return } - + // Eager by default try _testDSLCaptures( ("abc1def2", ("abc1def2", "2")), @@ -610,7 +610,7 @@ class RegexDSLTests: XCTestCase { Capture(.digit) ZeroOrMore(.any) } - + // Explicitly reluctant try _testDSLCaptures( ("abc1def2", ("abc1def2", "1")), @@ -701,7 +701,7 @@ class RegexDSLTests: XCTestCase { OneOrMore("a") }.repetitionBehavior(.possessive) } - + try _testDSLCaptures( ("abc1def2", "abc1def2"), matchType: Substring.self, ==) @@ -713,7 +713,7 @@ class RegexDSLTests: XCTestCase { CharacterClass.digit } } - + try _testDSLCaptures( ("abcdef2", ("abcdef2", "f")), ("2", ("2", nil)), @@ -727,7 +727,7 @@ class RegexDSLTests: XCTestCase { CharacterClass.digit } } - + try _testDSLCaptures( ("aaabbbcccdddeeefff", "aaabbbcccdddeeefff"), ("aaabbbcccccdddeeefff", "aaabbbcccccdddeeefff"), @@ -749,7 +749,7 @@ class RegexDSLTests: XCTestCase { Repeat(2...) { "e" } Repeat(0...) { "f" } } - + try _testDSLCaptures( ("", nil), ("a", nil), @@ -759,7 +759,7 @@ class RegexDSLTests: XCTestCase { { Repeat(2...) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", "a"), @@ -769,7 +769,7 @@ class RegexDSLTests: XCTestCase { { Repeat(...2) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", "a"), @@ -779,7 +779,7 @@ class RegexDSLTests: XCTestCase { { Repeat(..<2) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", nil), @@ -788,7 +788,7 @@ class RegexDSLTests: XCTestCase { { Repeat(...0) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", nil), @@ -797,7 +797,7 @@ class RegexDSLTests: XCTestCase { { Repeat(0 ... 0) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", nil), @@ -806,7 +806,7 @@ class RegexDSLTests: XCTestCase { { Repeat(count: 0) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", "a"), @@ -815,7 +815,7 @@ class RegexDSLTests: XCTestCase { { Repeat(0 ... 1) { "a" } } - + try _testDSLCaptures( ("", nil), ("a", "a"), @@ -825,7 +825,7 @@ class RegexDSLTests: XCTestCase { { Repeat(1 ... 2) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", nil), @@ -834,7 +834,7 @@ class RegexDSLTests: XCTestCase { { Repeat(0 ..< 1) { "a" } } - + try _testDSLCaptures( ("", ""), ("a", "a"), @@ -843,7 +843,7 @@ class RegexDSLTests: XCTestCase { { Repeat(0 ..< 2) { "a" } } - + try _testDSLCaptures( ("", nil), ("a", "a"), @@ -853,7 +853,7 @@ class RegexDSLTests: XCTestCase { { Repeat(1 ..< 3) { "a" } } - + let octoDecimalRegex: Regex<(Substring, Int?)> = Regex { let charClass = CharacterClass(.digit, "a"..."h")//.ignoringCase() Capture { @@ -908,7 +908,7 @@ class RegexDSLTests: XCTestCase { UnicodeScalar("e") Anchor.textSegmentBoundary } - + try _testDSLCaptures( ("aaaaa1", "aaaaa1"), ("aaaaa2", nil), @@ -935,7 +935,7 @@ class RegexDSLTests: XCTestCase { Anchor.endOfSubject }.anchorsMatchLineEndings() } - + try _testDSLCaptures( ("\naaa", "\naaa"), ("aaa\n", "aaa\n"), @@ -950,7 +950,7 @@ class RegexDSLTests: XCTestCase { Optionally { "\n" } } } - + // startOfLine/endOfLine apply regardless of mode. for matchLineEndings in [true, false] { for mode in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] { @@ -959,41 +959,41 @@ class RegexDSLTests: XCTestCase { Repeat("a", count: 3) Anchor.endOfLine }.anchorsMatchLineEndings(matchLineEndings).matchingSemantics(mode) - + XCTAssertNotNil(try r.firstMatch(in: "\naaa")) XCTAssertNotNil(try r.firstMatch(in: "aaa\n")) XCTAssertNotNil(try r.firstMatch(in: "\naaa\n")) XCTAssertNotNil(try r.firstMatch(in: "\naaa\r\n")) XCTAssertNotNil(try r.firstMatch(in: "\r\naaa\n")) XCTAssertNotNil(try r.firstMatch(in: "\r\naaa\r\n")) - + XCTAssertNil(try r.firstMatch(in: "\nbaaa\n")) XCTAssertNil(try r.firstMatch(in: "\naaab\n")) } } } - + func testNestedGroups() throws { return; - + // TODO: clarify what the nesting story is - + /* - try _testDSLCaptures( - ("aaaabccccddd", ("aaaabccccddd", [("b", "cccc", ["d", "d", "d"])])), - matchType: (Substring, [(Substring, Substring, [Substring])]).self, ==) - { - "a".+ - OneOrMore { - Capture(OneOrMore("b")) - Capture(ZeroOrMore("c")) - Capture("d").* - "e".? - } - } + try _testDSLCaptures( + ("aaaabccccddd", ("aaaabccccddd", [("b", "cccc", ["d", "d", "d"])])), + matchType: (Substring, [(Substring, Substring, [Substring])]).self, ==) + { + "a".+ + OneOrMore { + Capture(OneOrMore("b")) + Capture(ZeroOrMore("c")) + Capture("d").* + "e".? + } + } */ } - + func testCaptureTransform() throws { try _testDSLCaptures( ("aaaa1", ("aaaa1", "aaa")), @@ -1016,7 +1016,7 @@ class RegexDSLTests: XCTestCase { One(.digit) } } - + func testCapturelessQuantification() throws { // This test is to make sure that a captureless quantification, when used // straight out of the quantifier (without being wrapped in a builder), is @@ -1029,7 +1029,7 @@ class RegexDSLTests: XCTestCase { let match = try XCTUnwrap(input.wholeMatch(of: regex)?.output) XCTAssertTrue(match == input) } - + func testQuantificationWithTransformedCapture() throws { // This test is to make sure transformed capture type information is // correctly propagated from the DSL into the bytecode and that the engine @@ -1038,7 +1038,7 @@ class RegexDSLTests: XCTestCase { enum Word: Int32 { case apple case orange - + init?(_ string: Substring) { switch string { case "apple": self = .apple @@ -1063,7 +1063,7 @@ class RegexDSLTests: XCTestCase { } } } - + func testNestedCaptureTypes() throws { let regex1 = Regex { OneOrMore("a") @@ -1073,8 +1073,8 @@ class RegexDSLTests: XCTestCase { } } let _: (Substring, Substring, Substring).Type - = type(of: regex1).RegexOutput.self - + = type(of: regex1).RegexOutput.self + let regex2 = Regex { OneOrMore("a") Capture { @@ -1085,8 +1085,8 @@ class RegexDSLTests: XCTestCase { } } let _: (Substring, Substring, Int?).Type - = type(of: regex2).RegexOutput.self - + = type(of: regex2).RegexOutput.self + let regex3 = Regex { OneOrMore("a") Capture { @@ -1098,8 +1098,8 @@ class RegexDSLTests: XCTestCase { } } let _: (Substring, Substring, Int, Double?).Type - = type(of: regex3).RegexOutput.self - + = type(of: regex3).RegexOutput.self + let regex4 = Regex { OneOrMore("a") Capture { @@ -1113,50 +1113,50 @@ class RegexDSLTests: XCTestCase { } let _: ( Substring, Substring, Substring, Substring, Substring?).Type - = type(of: regex4).RegexOutput.self + = type(of: regex4).RegexOutput.self } - + func testUnicodeScalarPostProcessing() throws { let spaces = Regex { ZeroOrMore { One(.whitespace) } } - + let unicodeScalar = Regex { OneOrMore { One(.hexDigit) } spaces } - + let unicodeData = Regex { unicodeScalar Optionally { ".." unicodeScalar } - + ";" spaces - + Capture { OneOrMore(.word) } - + ZeroOrMore(.any) } - + // Assert the inferred capture type. let _: (Substring, Substring).Type = type(of: unicodeData).RegexOutput.self - + let unicodeLine = - "1BCA0..1BCA3 ; Control # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP" + "1BCA0..1BCA3 ; Control # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP" let match = try XCTUnwrap(unicodeLine.wholeMatch(of: unicodeData)) XCTAssertEqual(match.0, Substring(unicodeLine)) XCTAssertEqual(match.1, "Control") } - + func testGraphemeBreakData() throws { let line = """ A6F0..A6F1 ; Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS @@ -1192,7 +1192,7 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(upper, Unicode.Scalar(0xA6F1)) XCTAssertEqual(propertyString, "Extend") } - + let regexWithTryCapture = Regex { TryCapture { OneOrMore(.hexDigit) @@ -1227,10 +1227,10 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(upper, Unicode.Scalar(0xA6F1)) XCTAssertEqual(propertyString, "Extend") } - + do { let regexLiteral = try Regex( - #"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"#, + #"([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+).*"#, as: (Substring, Substring, Substring?, Substring).self) let maybeMatchResult = line.wholeMatch(of: regexLiteral) let matchResult = try XCTUnwrap(maybeMatchResult) @@ -1241,7 +1241,7 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(propertyString, "Extend") } } - + func testBackreference() throws { try _testDSLCaptures( ("abc#41#42abcabcabc", ("abc#41#42abcabcabc", "abc", 42, "abc", nil)), @@ -1267,7 +1267,7 @@ class RegexDSLTests: XCTestCase { Capture(a) } } - + // Match result referencing a `Reference`. do { let a = Reference(Substring.self) @@ -1295,7 +1295,7 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(result[a], "abc") XCTAssertEqual(result[b], 42) } - + do { let key = Reference(Substring.self) let value = Reference(Int.self) @@ -1313,15 +1313,15 @@ class RegexDSLTests: XCTestCase { } transform: { Int($0)! } } } - + let result1 = try XCTUnwrap("age:123".wholeMatch(of: regex)) XCTAssertEqual(result1[key], "age") XCTAssertEqual(result1[value], 123) - + let result2 = try XCTUnwrap(":567".wholeMatch(of: regex)) XCTAssertEqual(result2[key], "") XCTAssertEqual(result2[value], 567) - + let result3 = try XCTUnwrap("status:".wholeMatch(of: regex)) XCTAssertEqual(result3[key], "status") // Traps: @@ -1352,7 +1352,7 @@ class RegexDSLTests: XCTestCase { } } } - + // Post-hoc captured reference w/ attempted match before capture // #"(?:\w\1|(\w):)+"# // @@ -1401,7 +1401,7 @@ class RegexDSLTests: XCTestCase { } } } - + func testScalarMatching() throws { // RegexBuilder provides a RegexComponent conformance for UnicodeScalar. In // grapheme cluster mode, it should only match entire graphemes. It may @@ -1410,7 +1410,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNil("a\u{301}".firstMatch(of: "a" as UnicodeScalar)) XCTAssertNotNil("a\u{301}".firstMatch( of: ("a" as UnicodeScalar).regex.matchingSemantics(.unicodeScalar))) - + let r1 = Regex { "a" as UnicodeScalar } @@ -1418,7 +1418,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNotNil( try r1.matchingSemantics(.unicodeScalar).firstMatch(in: "a\u{301}") ) - + let r2 = Regex { CharacterClass.anyOf(["a" as UnicodeScalar, "👍"]) } @@ -1426,7 +1426,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNotNil( try r2.matchingSemantics(.unicodeScalar).firstMatch(in: "a\u{301}") ) - + let r3 = Regex { "👨" as UnicodeScalar "\u{200D}" as UnicodeScalar @@ -1440,7 +1440,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNotNil(try r3.wholeMatch(in: "👨‍👨‍👧‍👦")) XCTAssertNotNil(try r3.matchingSemantics(.unicodeScalar).firstMatch(in: "👨‍👨‍👧‍👦")) XCTAssertNotNil(try r3.matchingSemantics(.unicodeScalar).wholeMatch(in: "👨‍👨‍👧‍👦")) - + let r4 = Regex { "é" as UnicodeScalar } XCTAssertNotNil( try r4.firstMatch(in: "e\u{301}") @@ -1448,28 +1448,28 @@ class RegexDSLTests: XCTestCase { XCTAssertNotNil( try r4.firstMatch(in: "é") ) - + let r5 = Regex { "e" "\u{301}" as UnicodeScalar } XCTAssertNotNil(try r5.firstMatch(in: "e\u{301}")) XCTAssertNotNil(try r5.firstMatch(in: "é")) - + let r6 = Regex { "abcde" "\u{301}" } XCTAssertNotNil(try r6.firstMatch(in: "abcde\u{301}")) XCTAssertNotNil(try r6.firstMatch(in: "abcdé")) - + let r7 = Regex { "e" as Character "\u{301}" as Character } XCTAssertNotNil(try r7.firstMatch(in: "e\u{301}")) XCTAssertNotNil(try r7.firstMatch(in: "é")) - + // You can't match a partial grapheme in grapheme semantic mode. let r8 = Regex { "👨" as UnicodeScalar @@ -1482,7 +1482,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNil(try r8.wholeMatch(in: "👨‍👨‍👧‍👦")) XCTAssertNotNil(try r8.matchingSemantics(.unicodeScalar).firstMatch(in: "👨‍👨‍👧‍👦")) XCTAssertNil(try r8.matchingSemantics(.unicodeScalar).wholeMatch(in: "👨‍👨‍👧‍👦")) - + // Scalar coalescing occurs across nested concatenations and literals. let r9 = Regex { Regex { @@ -1504,7 +1504,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNotNil(try r9.wholeMatch(in: "👨‍👨‍👧‍👦")) XCTAssertNotNil(try r9.matchingSemantics(.unicodeScalar).firstMatch(in: "👨‍👨‍👧‍👦")) XCTAssertNotNil(try r9.matchingSemantics(.unicodeScalar).wholeMatch(in: "👨‍👨‍👧‍👦")) - + let r10 = Regex { "👨" as UnicodeScalar try! Regex(#"\u{200D 1F468 200D 1F467}"#) @@ -1516,7 +1516,7 @@ class RegexDSLTests: XCTestCase { XCTAssertNotNil(try r10.matchingSemantics(.unicodeScalar).firstMatch(in: "👨‍👨‍👧‍👦")) XCTAssertNotNil(try r10.matchingSemantics(.unicodeScalar).wholeMatch(in: "👨‍👨‍👧‍👦")) } - + struct SemanticVersion: Equatable { var major: Int var minor: Int @@ -1543,11 +1543,11 @@ class RegexDSLTests: XCTestCase { Capture(OneOrMore(.word)) } } - + guard let match = input[index..) throws -> (upperBound: String.Index, output: Void)? { print("Matching '\(label)'", to: &Self.traceOutput) print(input, to: &Self.traceOutput) @@ -1612,7 +1612,7 @@ class RegexDSLTests: XCTestCase { """) } - + func testRegexComponentBuilderResultType() { // Test that the user can declare a closure or computed property marked with // `@RegexComponentBuilder` with `Regex` as the result type. @@ -1655,7 +1655,7 @@ class RegexDSLTests: XCTestCase { XCTAssertEqual(try replace("{bar}"), "foo") } - + func testOptionalNesting() throws { try _testDSLCaptures( ("a", ("a", nil)), @@ -1666,7 +1666,7 @@ class RegexDSLTests: XCTestCase { { try! Regex("(?:a|(b)*)?", as: (Substring, Substring?).self) } - + try _testDSLCaptures( ("a", ("a", nil)), ("", ("", nil)), @@ -1678,7 +1678,7 @@ class RegexDSLTests: XCTestCase { try! Regex("a|(b)*", as: (Substring, Substring?).self) } } - + try _testDSLCaptures( ("a", ("a", nil)), ("", ("", nil)), @@ -1693,7 +1693,7 @@ class RegexDSLTests: XCTestCase { } } } - + try _testDSLCaptures( ("a", ("a", nil)), ("", ("", nil)), @@ -1706,7 +1706,7 @@ class RegexDSLTests: XCTestCase { try! Regex("(b)*", as: (Substring, Substring?).self) } } - + try _testDSLCaptures( ("a", ("a", nil)), ("", ("", nil)), @@ -1721,7 +1721,7 @@ class RegexDSLTests: XCTestCase { } } } - + try _testDSLCaptures( ("a", ("a", nil)), ("", ("", nil)), @@ -1738,7 +1738,7 @@ class RegexDSLTests: XCTestCase { } } } - + let r = Regex { Optionally { Optionally { @@ -1757,10 +1757,10 @@ class RegexDSLTests: XCTestCase { } } } - + fileprivate let oneNumericField = "abc:123:def" fileprivate let twoNumericFields = "abc:123:def:456:ghi" - + @available(SwiftStdlib 5.7, *) fileprivate let regexWithCapture = #/:(\d+):/# @available(SwiftStdlib 5.7, *) @@ -1771,113 +1771,125 @@ fileprivate let regexWithNonCapture = #/:(?:\d+):/# @available(SwiftStdlib 5.7, *) extension RegexDSLTests { func testLabeledCaptures_regularCapture() throws { - // The output type of a regex with unlabeled captures is concatenated. - let dslWithCapture = Regex { - OneOrMore(.word) - regexWithCapture - OneOrMore(.word) - } - XCTAssert(type(of: dslWithCapture).self == Regex<(Substring, Substring)>.self) - - let output = try XCTUnwrap(oneNumericField.wholeMatch(of: dslWithCapture)?.output) - XCTAssertEqual(output.0, oneNumericField[...]) - XCTAssertEqual(output.1, "123") + // The output type of a regex with unlabeled captures is concatenated. + let dslWithCapture = Regex { + OneOrMore(.word) + regexWithCapture + OneOrMore(.word) } + XCTAssert(type(of: dslWithCapture).self == Regex<(Substring, Substring)>.self) + + let output = try XCTUnwrap(oneNumericField.wholeMatch(of: dslWithCapture)?.output) + XCTAssertEqual(output.0, oneNumericField[...]) + XCTAssertEqual(output.1, "123") + } func testLabeledCaptures_labeledCapture() throws { - // The output type of a regex with a labeled capture is dropped. - let dslWithLabeledCapture = Regex { - OneOrMore(.word) - regexWithLabeledCapture - OneOrMore(.word) - } - XCTAssert(type(of: dslWithLabeledCapture).self == Regex.self) - - let match = try XCTUnwrap(oneNumericField.wholeMatch(of: dslWithLabeledCapture)) - XCTAssertEqual(match.output, oneNumericField[...]) - - // We can recover the ignored captures by converting to `AnyRegexOutput`. - let anyOutput = AnyRegexOutput(match) - XCTAssertEqual(anyOutput.count, 2) - XCTAssertEqual(anyOutput[0].substring, oneNumericField[...]) - XCTAssertEqual(anyOutput[1].substring, "123") - XCTAssertEqual(anyOutput["number"]?.substring, "123") + guard #available(macOS 13, *) else { + XCTSkip("Fix only exists on macOS 13") + return + } + // The output type of a regex with a labeled capture is dropped. + let dslWithLabeledCapture = Regex { + OneOrMore(.word) + regexWithLabeledCapture + OneOrMore(.word) } + XCTAssert(type(of: dslWithLabeledCapture).self == Regex.self) + + let match = try XCTUnwrap(oneNumericField.wholeMatch(of: dslWithLabeledCapture)) + XCTAssertEqual(match.output, oneNumericField[...]) + + // We can recover the ignored captures by converting to `AnyRegexOutput`. + let anyOutput = AnyRegexOutput(match) + XCTAssertEqual(anyOutput.count, 2) + XCTAssertEqual(anyOutput[0].substring, oneNumericField[...]) + XCTAssertEqual(anyOutput[1].substring, "123") + XCTAssertEqual(anyOutput["number"]?.substring, "123") + } func testLabeledCaptures_coalescingWithCapture() throws { - let coalescingWithCapture = Regex { - "e" as Character - #/\u{301}(\d*)/# - } - XCTAssertNotNil(try coalescingWithCapture.firstMatch(in: "e\u{301}")) - XCTAssertNotNil(try coalescingWithCapture.firstMatch(in: "é")) - - let coalescingWithLabeledCapture = Regex { - "e" as Character - #/\u{301}(?\d*)/# - } - XCTAssertNotNil(try coalescingWithLabeledCapture.firstMatch(in: "e\u{301}")) - XCTAssertNotNil(try coalescingWithLabeledCapture.firstMatch(in: "é")) + let coalescingWithCapture = Regex { + "e" as Character + #/\u{301}(\d*)/# } + XCTAssertNotNil(try coalescingWithCapture.firstMatch(in: "e\u{301}")) + XCTAssertNotNil(try coalescingWithCapture.firstMatch(in: "é")) + + let coalescingWithLabeledCapture = Regex { + "e" as Character + #/\u{301}(?\d*)/# + } + XCTAssertNotNil(try coalescingWithLabeledCapture.firstMatch(in: "e\u{301}")) + XCTAssertNotNil(try coalescingWithLabeledCapture.firstMatch(in: "é")) + } func testLabeledCaptures_bothCapture() throws { - // Only the output type of a regex with a labeled capture is dropped, - // outputs of other regexes in the same DSL are concatenated. - let dslWithBothCaptures = Regex { - OneOrMore(.word) - regexWithCapture - OneOrMore(.word) - regexWithLabeledCapture - OneOrMore(.word) - } - XCTAssert(type(of: dslWithBothCaptures).self == Regex<(Substring, Substring)>.self) - - let match = try XCTUnwrap(twoNumericFields.wholeMatch(of: dslWithBothCaptures)) - XCTAssertEqual(match.output.0, twoNumericFields[...]) - XCTAssertEqual(match.output.1, "123") - - let anyOutput = AnyRegexOutput(match) - XCTAssertEqual(anyOutput.count, 3) - XCTAssertEqual(anyOutput[0].substring, twoNumericFields[...]) - XCTAssertEqual(anyOutput[1].substring, "123") - XCTAssertEqual(anyOutput[2].substring, "456") + guard #available(macOS 13, *) else { + XCTSkip("Fix only exists on macOS 13") + return + } + // Only the output type of a regex with a labeled capture is dropped, + // outputs of other regexes in the same DSL are concatenated. + let dslWithBothCaptures = Regex { + OneOrMore(.word) + regexWithCapture + OneOrMore(.word) + regexWithLabeledCapture + OneOrMore(.word) } + XCTAssert(type(of: dslWithBothCaptures).self == Regex<(Substring, Substring)>.self) + + let match = try XCTUnwrap(twoNumericFields.wholeMatch(of: dslWithBothCaptures)) + XCTAssertEqual(match.output.0, twoNumericFields[...]) + XCTAssertEqual(match.output.1, "123") + + let anyOutput = AnyRegexOutput(match) + XCTAssertEqual(anyOutput.count, 3) + XCTAssertEqual(anyOutput[0].substring, twoNumericFields[...]) + XCTAssertEqual(anyOutput[1].substring, "123") + XCTAssertEqual(anyOutput[2].substring, "456") + } func testLabeledCaptures_tooManyCapture() throws { - // The output type of a regex with too many captures is dropped. - // "Too many" means the left and right output types would add up to >= 10. - let alpha = "AAA:abcdefghijklm:123:456:" - let regexWithTooManyCaptures = #/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)/# - let dslWithTooManyCaptures = Regex { - Capture(OneOrMore(.word)) - ":" - regexWithTooManyCaptures - ":" - TryCapture(OneOrMore(.word)) { Int($0) } - #/:(\d+):/# - } - XCTAssert(type(of: dslWithTooManyCaptures).self - == Regex<(Substring, Substring, Int, Substring)>.self) - - let match = try XCTUnwrap(alpha.wholeMatch(of: dslWithTooManyCaptures)) - XCTAssertEqual(match.output.0, alpha[...]) - XCTAssertEqual(match.output.1, "AAA") - XCTAssertEqual(match.output.2, 123) - XCTAssertEqual(match.output.3, "456") - - // All captures groups are available through `AnyRegexOutput`. - let anyOutput = AnyRegexOutput(match) - XCTAssertEqual(anyOutput.count, 17) - XCTAssertEqual(anyOutput[0].substring, alpha[...]) - XCTAssertEqual(anyOutput[1].substring, "AAA") - for (offset, letter) in "abcdefghijklm".enumerated() { - XCTAssertEqual(anyOutput[offset + 2].substring, String(letter)[...]) - } - XCTAssertEqual(anyOutput[15].substring, "123") - XCTAssertEqual(anyOutput[15].value as? Int, 123) - XCTAssertEqual(anyOutput[16].substring, "456") + guard #available(macOS 13, *) else { + XCTSkip("Fix only exists on macOS 13") + return + } + // The output type of a regex with too many captures is dropped. + // "Too many" means the left and right output types would add up to >= 10. + let alpha = "AAA:abcdefghijklm:123:456:" + let regexWithTooManyCaptures = #/(a)(b)(c)(d)(e)(f)(g)(h)(i)(j)(k)(l)(m)/# + let dslWithTooManyCaptures = Regex { + Capture(OneOrMore(.word)) + ":" + regexWithTooManyCaptures + ":" + TryCapture(OneOrMore(.word)) { Int($0) } + #/:(\d+):/# } + XCTAssert(type(of: dslWithTooManyCaptures).self + == Regex<(Substring, Substring, Int, Substring)>.self) + + let match = try XCTUnwrap(alpha.wholeMatch(of: dslWithTooManyCaptures)) + XCTAssertEqual(match.output.0, alpha[...]) + XCTAssertEqual(match.output.1, "AAA") + XCTAssertEqual(match.output.2, 123) + XCTAssertEqual(match.output.3, "456") + + // All captures groups are available through `AnyRegexOutput`. + let anyOutput = AnyRegexOutput(match) + XCTAssertEqual(anyOutput.count, 17) + XCTAssertEqual(anyOutput[0].substring, alpha[...]) + XCTAssertEqual(anyOutput[1].substring, "AAA") + for (offset, letter) in "abcdefghijklm".enumerated() { + XCTAssertEqual(anyOutput[offset + 2].substring, String(letter)[...]) + } + XCTAssertEqual(anyOutput[15].substring, "123") + XCTAssertEqual(anyOutput[15].value as? Int, 123) + XCTAssertEqual(anyOutput[16].substring, "456") } +} extension Unicode.Scalar { // Convert a hexadecimal string to a scalar