Skip to content

[swift/main] Integrate latest changes #620

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Dec 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,6 @@ fastlane/test_output
# https://github.com/johnno1962/injectionforxcode

iOSInjectionProject/

# DocC build folder
*.docc-build
8 changes: 8 additions & 0 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ let package = Package(
swiftSettings: [
.unsafeFlags(["-Xfrontend", "-disable-availability-checking"])
]),
.testTarget(
name: "DocumentationTests",
dependencies: ["_StringProcessing", "RegexBuilder"],
swiftSettings: [
availabilityDefinition,
.unsafeFlags(["-enable-bare-slash-regex"]),
]),

// FIXME: Disabled due to rdar://94763190.
// .testTarget(
// name: "Prototypes",
Expand Down
6 changes: 3 additions & 3 deletions Sources/RegexBuilder/Anchor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ extension Anchor {
}

/// An anchor that matches at the end of the input string or at the end of
/// the line immediately before the the end of the string.
/// the line immediately before the end of the string.
///
/// This anchor is equivalent to `\Z` in regex syntax.
public static var endOfSubjectBeforeNewline: Anchor {
Expand Down Expand Up @@ -147,7 +147,7 @@ extension Anchor {
///
/// Word boundaries are identified using the Unicode default word boundary
/// algorithm by default. To specify a different word boundary algorithm,
/// see the `RegexComponent.wordBoundaryKind(_:)` method.
/// use the `wordBoundaryKind(_:)` method.
///
/// This anchor is equivalent to `\b` in regex syntax.
public static var wordBoundary: Anchor {
Expand All @@ -157,7 +157,7 @@ extension Anchor {
/// The inverse of this anchor, which matches at every position that this
/// anchor does not.
///
/// For the `wordBoundary` and `textSegmentBoundary` anchors, the inverted
/// For the ``wordBoundary`` and ``textSegmentBoundary`` anchors, the inverted
/// version corresponds to `\B` and `\Y`, respectively.
public var inverted: Anchor {
var result = self
Expand Down
9 changes: 9 additions & 0 deletions Sources/RegexBuilder/Builder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@

@_spi(RegexBuilder) import _StringProcessing

/// A custom parameter attribute that constructs regular expressions from
/// closures.
///
/// You typically see `RegexComponentBuilder` as a parameter attribute for
/// `Regex`- or `RegexComponent`-producing closure parameters, allowing those
/// closures to combine multiple regular expression components. Type
/// initializers and string algorithm methods in the RegexBuilder framework
/// include a builder closure parameter, so that you can use regular expression
/// components together.
@available(SwiftStdlib 5.7, *)
@resultBuilder
public enum RegexComponentBuilder {
Expand Down
102 changes: 94 additions & 8 deletions Sources/RegexBuilder/CharacterClass.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
@_implementationOnly import _RegexParser
@_spi(RegexBuilder) import _StringProcessing

/// A class of characters that match in a regex.
///
/// A character class can represent individual characters, a group of
/// characters, the set of character that match some set of criteria, or
/// a set algebraic combination of all of the above.
@available(SwiftStdlib 5.7, *)
public struct CharacterClass {
internal var ccc: DSLTree.CustomCharacterClass
Expand Down Expand Up @@ -42,6 +47,20 @@ extension CharacterClass: RegexComponent {

@available(SwiftStdlib 5.7, *)
extension CharacterClass {
/// A character class that matches any character that does not match this
/// character class.
///
/// For example, you can use the `inverted` property to create a character
/// class that excludes a specific group of characters:
///
/// let validCharacters = CharacterClass("a"..."z", .anyOf("-_"))
/// let invalidCharacters = validCharacters.inverted
///
/// let username = "user123"
/// if username.contains(invalidCharacters) {
/// print("Invalid username: '\(username)'")
/// }
/// // Prints "Invalid username: 'user123'"
public var inverted: CharacterClass {
if let inv = builtin?.inverted {
return CharacterClass(builtin: inv)
Expand All @@ -53,26 +72,50 @@ extension CharacterClass {

@available(SwiftStdlib 5.7, *)
extension RegexComponent where Self == CharacterClass {
/// A character class that matches any element.
///
/// This character class is unaffected by the `dotMatchesNewlines()` method.
/// To match any character that isn't a newline, see
/// ``anyNonNewline``.
///
/// This character class is equivalent to the regex syntax "dot"
/// metacharacter in single-line mode: `(?s:.)`.
public static var any: CharacterClass {
.init(DSLTree.CustomCharacterClass(members: [.atom(.any)]))
}

/// A character class that matches any element that isn't a newline.
///
/// This character class is unaffected by the `dotMatchesNewlines()` method.
/// To match any character, including newlines, see ``any``.
///
/// This character class is equivalent to the regex syntax "dot"
/// metacharacter with single-line mode disabled: `(?-s:.)`.
public static var anyNonNewline: CharacterClass {
.init(DSLTree.CustomCharacterClass(members: [.atom(.anyNonNewline)]))
}

/// A character class that matches any single `Character`, or extended
/// grapheme cluster, regardless of the current semantic level.
///
/// This character class is equivalent to `\X` in regex syntax.
public static var anyGraphemeCluster: CharacterClass {
.init(builtin: .anyGrapheme)
}

public static var whitespace: CharacterClass {
.init(builtin: .whitespace)
}

/// A character class that matches any digit.
///
/// This character class is equivalent to `\d` in regex syntax.
public static var digit: CharacterClass {
.init(builtin: .digit)
}

/// A character class that matches any hexadecimal digit.
///
/// `hexDigit` matches the ASCII characters `0` through `9`, and upper- or
/// lowercase `a` through `f`. The corresponding characters in the "Halfwidth
/// and Fullwidth Forms" Unicode block are not matched by this character
/// class.
public static var hexDigit: CharacterClass {
.init(DSLTree.CustomCharacterClass(members: [
.range(.char("A"), .char("F")),
Expand All @@ -81,27 +124,56 @@ extension RegexComponent where Self == CharacterClass {
]))
}

/// A character class that matches any element that is a "word character".
///
/// This character class is equivalent to `\w` in regex syntax.
public static var word: CharacterClass {
.init(builtin: .word)
}

/// A character class that matches any element that is classified as
/// whitespace.
///
/// This character class is equivalent to `\s` in regex syntax.
public static var whitespace: CharacterClass {
.init(builtin: .whitespace)
}

/// A character class that matches any element that is classified as
/// horizontal whitespace.
///
/// This character class is equivalent to `\h` in regex syntax.
public static var horizontalWhitespace: CharacterClass {
.init(builtin: .horizontalWhitespace)
}

/// A character class that matches any newline sequence.
///
/// This character class is equivalent to `\R` or `\n` in regex syntax.
public static var newlineSequence: CharacterClass {
.init(builtin: .newlineSequence)
}

/// A character class that matches any element that is classified as
/// vertical whitespace.
///
/// This character class is equivalent to `\v` in regex syntax.
public static var verticalWhitespace: CharacterClass {
.init(builtin: .verticalWhitespace)
}

public static var word: CharacterClass {
.init(builtin: .word)
}
}

@available(SwiftStdlib 5.7, *)
extension RegexComponent where Self == CharacterClass {
/// Returns a character class that matches any character in the given string
/// or sequence.
///
/// Calling this method with a group of characters is equivalent to listing
/// those characters in a custom character class in regex syntax. For example,
/// the two regexes in this example are equivalent:
///
/// let regex1 = /[abcd]+/
/// let regex2 = OneOrMore(.anyOf("abcd"))
public static func anyOf<S: Sequence>(_ s: S) -> CharacterClass
where S.Element == Character
{
Expand All @@ -111,6 +183,9 @@ extension RegexComponent where Self == CharacterClass {

/// Returns a character class that matches any Unicode scalar in the given
/// sequence.
///
/// Calling this method with a group of Unicode scalars is equivalent to
/// listing them in a custom character class in regex syntax.
public static func anyOf<S: Sequence>(_ s: S) -> CharacterClass
where S.Element == UnicodeScalar
{
Expand All @@ -122,6 +197,11 @@ extension RegexComponent where Self == CharacterClass {
// Unicode properties
@available(SwiftStdlib 5.7, *)
extension CharacterClass {
/// Returns a character class that matches any element with the given Unicode
/// general category.
///
/// For example, when passed `.uppercaseLetter`, this method is equivalent to
/// `/\p{Uppercase_Letter}/` or `/\p{Lu}/`.
public static func generalCategory(_ category: Unicode.GeneralCategory) -> CharacterClass {
return CharacterClass(.generalCategory(category))
}
Expand All @@ -148,6 +228,7 @@ public func ...(lhs: UnicodeScalar, rhs: UnicodeScalar) -> CharacterClass {

@available(SwiftStdlib 5.7, *)
extension RegexComponent where Self == CharacterClass {
/// Creates a character class that combines the given classes in a union.
public init(_ first: CharacterClass, _ rest: CharacterClass...) {
if rest.isEmpty {
self.init(first.ccc)
Expand All @@ -161,24 +242,29 @@ extension RegexComponent where Self == CharacterClass {

@available(SwiftStdlib 5.7, *)
extension CharacterClass {
/// Returns a character class from the union of this class and the given class.
public func union(_ other: CharacterClass) -> CharacterClass {
CharacterClass(.init(members: [
.custom(self.ccc),
.custom(other.ccc)]))
}

/// Returns a character class from the intersection of this class and the given class.
public func intersection(_ other: CharacterClass) -> CharacterClass {
CharacterClass(.init(members: [
.intersection(self.ccc, other.ccc)
]))
}

/// Returns a character class by subtracting the given class from this class.
public func subtracting(_ other: CharacterClass) -> CharacterClass {
CharacterClass(.init(members: [
.subtraction(self.ccc, other.ccc)
]))
}

/// Returns a character class matching elements in one or the other, but not both,
/// of this class and the given class.
public func symmetricDifference(_ other: CharacterClass) -> CharacterClass {
CharacterClass(.init(members: [
.symmetricDifference(self.ccc, other.ccc)
Expand Down
Loading