diff --git a/Package.swift b/Package.swift
index abc895813..c1e9bff37 100644
--- a/Package.swift
+++ b/Package.swift
@@ -75,15 +75,17 @@ let package = Package(
             name: "RegexBuilder",
             dependencies: ["_StringProcessing", "_RegexParser"],
             swiftSettings: publicStdlibSettings),
+        .target(name: "TestSupport",
+                swiftSettings: [availabilityDefinition]),
         .testTarget(
             name: "RegexTests",
-            dependencies: ["_StringProcessing"],
+            dependencies: ["_StringProcessing", "TestSupport"],
             swiftSettings: [
                 .unsafeFlags(["-Xfrontend", "-disable-availability-checking"]),
             ]),
         .testTarget(
             name: "RegexBuilderTests",
-            dependencies: ["_StringProcessing", "RegexBuilder"],
+            dependencies: ["_StringProcessing", "RegexBuilder", "TestSupport"],
             swiftSettings: [
                 .unsafeFlags(["-Xfrontend", "-disable-availability-checking"])
             ]),
diff --git a/Sources/RegexBuilder/Anchor.swift b/Sources/RegexBuilder/Anchor.swift
index 31a3e8a0d..cf1931577 100644
--- a/Sources/RegexBuilder/Anchor.swift
+++ b/Sources/RegexBuilder/Anchor.swift
@@ -37,16 +37,30 @@ public struct Anchor {
 
 @available(SwiftStdlib 5.7, *)
 extension Anchor: RegexComponent {
-  var baseAssertion: DSLTree._AST.AssertionKind {
+  var baseAssertion: DSLTree.Atom.Assertion {
     switch kind {
-    case .startOfSubject: return .startOfSubject(isInverted)
-    case .endOfSubjectBeforeNewline: return .endOfSubjectBeforeNewline(isInverted)
-    case .endOfSubject: return .endOfSubject(isInverted)
-    case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject(isInverted)
-    case .textSegmentBoundary: return .textSegmentBoundary(isInverted)
-    case .startOfLine: return .startOfLine(isInverted)
-    case .endOfLine: return .endOfLine(isInverted)
-    case .wordBoundary: return .wordBoundary(isInverted)
+    case .startOfSubject:
+      // FIXME: Inverted?
+      return .startOfSubject
+    case .endOfSubjectBeforeNewline:
+      // FIXME: Inverted?
+      return .endOfSubjectBeforeNewline
+    case .endOfSubject:
+      // FIXME: Inverted?
+      return .endOfSubject
+    case .firstMatchingPositionInSubject:
+      // FIXME: Inverted?
+      return .firstMatchingPositionInSubject
+    case .textSegmentBoundary:
+      return isInverted ? .notTextSegment : .textSegment
+    case .startOfLine:
+      // FIXME: Inverted?
+      return .startOfLine
+    case .endOfLine:
+      // FIXME: Inverted?
+      return .endOfLine
+    case .wordBoundary:
+      return isInverted ? .notWordBoundary : .wordBoundary
     }
   }
   
@@ -104,6 +118,12 @@ extension Anchor {
   ///
   /// This anchor is equivalent to `^` in regex syntax when the `m` option
   /// has been enabled or `anchorsMatchLineEndings(true)` has been called.
+  ///
+  /// For example, the following regexes are all equivalent:
+  ///
+  /// - `Regex { Anchor.startOfLine }`
+  /// - `/(?m)^/` or `/(?m:^)/`
+  /// - `/^/.anchorsMatchLineEndings(true)`
   public static var startOfLine: Anchor {
     Anchor(kind: .startOfLine)
   }
@@ -113,6 +133,12 @@ extension Anchor {
   ///
   /// This anchor is equivalent to `$` in regex syntax when the `m` option
   /// has been enabled or `anchorsMatchLineEndings(true)` has been called.
+  ///
+  /// For example, the following regexes are all equivalent:
+  ///
+  /// - `Regex { Anchor.endOfLine }`
+  /// - `/(?m)$/` or `/(?m:$)/`
+  /// - `/$/.anchorsMatchLineEndings(true)`
   public static var endOfLine: Anchor {
     Anchor(kind: .endOfLine)
   }
diff --git a/Sources/RegexBuilder/CharacterClass.swift b/Sources/RegexBuilder/CharacterClass.swift
index a6d18b2cf..ea52c28f3 100644
--- a/Sources/RegexBuilder/CharacterClass.swift
+++ b/Sources/RegexBuilder/CharacterClass.swift
@@ -20,11 +20,8 @@ public struct CharacterClass {
     self.ccc = ccc
   }
   
-  init(unconverted model: _CharacterClassModel) {
-    guard let ccc = model.makeDSLTreeCharacterClass() else {
-      fatalError("Unsupported character class")
-    }
-    self.ccc = ccc
+  init(unconverted atom: DSLTree._AST.Atom) {
+    self.ccc = .init(members: [.atom(.unconverted(atom))])
   }
 }
 
@@ -48,16 +45,20 @@ extension RegexComponent where Self == CharacterClass {
     .init(DSLTree.CustomCharacterClass(members: [.atom(.any)]))
   }
 
+  public static var anyNonNewline: CharacterClass {
+    .init(DSLTree.CustomCharacterClass(members: [.atom(.anyNonNewline)]))
+  }
+
   public static var anyGraphemeCluster: CharacterClass {
-    .init(unconverted: .anyGrapheme)
+    .init(unconverted: ._anyGrapheme)
   }
   
   public static var whitespace: CharacterClass {
-    .init(unconverted: .whitespace)
+    .init(unconverted: ._whitespace)
   }
   
   public static var digit: CharacterClass {
-    .init(unconverted: .digit)
+    .init(unconverted: ._digit)
   }
   
   public static var hexDigit: CharacterClass {
@@ -69,19 +70,19 @@ extension RegexComponent where Self == CharacterClass {
   }
 
   public static var horizontalWhitespace: CharacterClass {
-    .init(unconverted: .horizontalWhitespace)
+    .init(unconverted: ._horizontalWhitespace)
   }
 
   public static var newlineSequence: CharacterClass {
-    .init(unconverted: .newlineSequence)
+    .init(unconverted: ._newlineSequence)
   }
 
   public static var verticalWhitespace: CharacterClass {
-    .init(unconverted: .verticalWhitespace)
+    .init(unconverted: ._verticalWhitespace)
   }
 
   public static var word: CharacterClass {
-    .init(unconverted: .word)
+    .init(unconverted: ._word)
   }
 }
 
diff --git a/Sources/TestSupport/TestSupport.swift b/Sources/TestSupport/TestSupport.swift
new file mode 100644
index 000000000..b60adb63f
--- /dev/null
+++ b/Sources/TestSupport/TestSupport.swift
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2022 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+//
+//===----------------------------------------------------------------------===//
+
+import XCTest
+
+// We need to split this out of the test files, as it needs to be compiled
+// *without* `-disable-availability-checking` to ensure the #available check is
+// not compiled into a no-op.
+
+#if os(Linux)
+public func XCTExpectFailure(
+  _ message: String? = nil, body: () throws -> Void
+) rethrows {}
+#endif
+
+/// Guards certain tests to make sure we have a new stdlib available.
+public func ensureNewStdlib(
+  file: StaticString = #file, line: UInt = #line
+) -> Bool {
+  guard #available(SwiftStdlib 5.7, *) else {
+    XCTExpectFailure { XCTFail("Unsupported stdlib", file: file, line: line) }
+    return false
+  }
+  return true
+}
diff --git a/Sources/_RegexParser/Regex/AST/Atom.swift b/Sources/_RegexParser/Regex/AST/Atom.swift
index f1419ad78..8706327f7 100644
--- a/Sources/_RegexParser/Regex/AST/Atom.swift
+++ b/Sources/_RegexParser/Regex/AST/Atom.swift
@@ -60,13 +60,13 @@ extension AST {
       case namedCharacter(String)
 
       /// .
-      case any
+      case dot
 
       /// ^
-      case startOfLine
+      case caretAnchor
 
       /// $
-      case endOfLine
+      case dollarAnchor
 
       // References
       case backreference(Reference)
@@ -104,9 +104,9 @@ extension AST.Atom {
     case .callout(let v):               return v
     case .backtrackingDirective(let v): return v
     case .changeMatchingOptions(let v): return v
-    case .any:                          return nil
-    case .startOfLine:                  return nil
-    case .endOfLine:                    return nil
+    case .dot:                          return nil
+    case .caretAnchor:                  return nil
+    case .dollarAnchor:                 return nil
     case .invalid:                      return nil
     }
   }
@@ -511,67 +511,6 @@ extension AST.Atom.CharacterProperty {
   }
 }
 
-extension AST.Atom {
-  /// Anchors and other built-in zero-width assertions.
-  public enum AssertionKind: String, Hashable {
-    /// \A
-    case startOfSubject = #"\A"#
-
-    /// \Z
-    case endOfSubjectBeforeNewline = #"\Z"#
-
-    /// \z
-    case endOfSubject = #"\z"#
-
-    /// \K
-    case resetStartOfMatch = #"\K"#
-
-    /// \G
-    case firstMatchingPositionInSubject = #"\G"#
-
-    /// \y
-    case textSegment = #"\y"#
-
-    /// \Y
-    case notTextSegment = #"\Y"#
-
-    /// ^
-    case startOfLine = #"^"#
-
-    /// $
-    case endOfLine = #"$"#
-
-    /// \b (from outside a custom character class)
-    case wordBoundary = #"\b"#
-
-    /// \B
-    case notWordBoundary = #"\B"#
-
-  }
-
-  public var assertionKind: AssertionKind? {
-    switch kind {
-    case .startOfLine:     return .startOfLine
-    case .endOfLine:       return .endOfLine
-
-    case .escaped(.wordBoundary):    return .wordBoundary
-    case .escaped(.notWordBoundary): return .notWordBoundary
-    case .escaped(.startOfSubject):  return .startOfSubject
-    case .escaped(.endOfSubject):    return .endOfSubject
-    case .escaped(.textSegment):     return .textSegment
-    case .escaped(.notTextSegment):  return .notTextSegment
-    case .escaped(.endOfSubjectBeforeNewline):
-      return .endOfSubjectBeforeNewline
-    case .escaped(.firstMatchingPositionInSubject):
-      return .firstMatchingPositionInSubject
-
-    case .escaped(.resetStartOfMatch): return .resetStartOfMatch
-
-    default: return nil
-    }
-  }
-}
-
 extension AST.Atom {
   public enum Callout: Hashable {
     /// A PCRE callout written `(?C...)`
@@ -806,9 +745,9 @@ extension AST.Atom {
       // the AST? Or defer for the matching engine?
       return nil
 
-    case .scalarSequence, .property, .any, .startOfLine, .endOfLine,
-        .backreference, .subpattern, .callout, .backtrackingDirective,
-        .changeMatchingOptions, .invalid:
+    case .scalarSequence, .property, .dot, .caretAnchor,
+        .dollarAnchor, .backreference, .subpattern, .callout,
+        .backtrackingDirective, .changeMatchingOptions, .invalid:
       return nil
     }
   }
@@ -816,8 +755,10 @@ extension AST.Atom {
   /// Whether this atom is valid as the operand of a custom character class
   /// range.
   public var isValidCharacterClassRangeBound: Bool {
-    // If we have a literal character value for this, it can be used as a bound.
-    if literalCharacterValue != nil { return true }
+    if let c = literalCharacterValue {
+      // We only match character range bounds that are single scalar NFC.
+      return c.hasExactlyOneScalar && c.isNFC
+    }
     switch kind {
     // \cx, \C-x, \M-x, \M-\C-x, \N{...}
     case .keyboardControl, .keyboardMeta, .keyboardMetaControl, .namedCharacter:
@@ -858,7 +799,7 @@ extension AST.Atom {
     case .keyboardMetaControl(let x):
       return "\\M-\\C-\(x)"
 
-    case .property, .escaped, .any, .startOfLine, .endOfLine,
+    case .property, .escaped, .dot, .caretAnchor, .dollarAnchor,
         .backreference, .subpattern, .namedCharacter, .callout,
         .backtrackingDirective, .changeMatchingOptions, .invalid:
       return nil
@@ -874,7 +815,7 @@ extension AST.Atom {
     // TODO: Are callouts quantifiable?
     case .escaped(let esc):
       return esc.isQuantifiable
-    case .startOfLine, .endOfLine:
+    case .caretAnchor, .dollarAnchor:
       return false
     default:
       return true
diff --git a/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift b/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift
index 2168dbb03..a830a18b7 100644
--- a/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift
+++ b/Sources/_RegexParser/Regex/Parse/LexicalAnalysis.swift
@@ -480,35 +480,37 @@ extension Parser {
   ///
   mutating func lexQuantifier(
   ) -> (Located<Quant.Amount>, Located<Quant.Kind>, [AST.Trivia])? {
-    var trivia: [AST.Trivia] = []
+    tryEating { p in
+      var trivia: [AST.Trivia] = []
 
-    if let t = lexNonSemanticWhitespace() { trivia.append(t) }
+      if let t = p.lexNonSemanticWhitespace() { trivia.append(t) }
 
-    let amt: Located<Quant.Amount>? = recordLoc { p in
-      if p.tryEat("*") { return .zeroOrMore }
-      if p.tryEat("+") { return .oneOrMore }
-      if p.tryEat("?") { return .zeroOrOne }
+      let amt: Located<Quant.Amount>? = p.recordLoc { p in
+        if p.tryEat("*") { return .zeroOrMore }
+        if p.tryEat("+") { return .oneOrMore }
+        if p.tryEat("?") { return .zeroOrOne }
 
-      return p.tryEating { p in
-        guard p.tryEat("{"),
-              let range = p.lexRange(trivia: &trivia),
-              p.tryEat("}")
-        else { return nil }
-        return range.value
+        return p.tryEating { p in
+          guard p.tryEat("{"),
+                let range = p.lexRange(trivia: &trivia),
+                p.tryEat("}")
+          else { return nil }
+          return range.value
+        }
       }
-    }
-    guard let amt = amt else { return nil }
+      guard let amt = amt else { return nil }
 
-    // PCRE allows non-semantic whitespace here in extended syntax mode.
-    if let t = lexNonSemanticWhitespace() { trivia.append(t) }
+      // PCRE allows non-semantic whitespace here in extended syntax mode.
+      if let t = p.lexNonSemanticWhitespace() { trivia.append(t) }
 
-    let kind: Located<Quant.Kind> = recordLoc { p in
-      if p.tryEat("?") { return .reluctant  }
-      if p.tryEat("+") { return .possessive }
-      return .eager
-    }
+      let kind: Located<Quant.Kind> = p.recordLoc { p in
+        if p.tryEat("?") { return .reluctant  }
+        if p.tryEat("+") { return .possessive }
+        return .eager
+      }
 
-    return (amt, kind, trivia)
+      return (amt, kind, trivia)
+    }
   }
 
   /// Try to consume a range, returning `nil` if unsuccessful.
@@ -2073,9 +2075,9 @@ extension Parser {
         p.unreachable("Should have lexed a group or group-like atom")
 
       // (sometimes) special metacharacters
-      case ".": return customCC ? .char(".") : .any
-      case "^": return customCC ? .char("^") : .startOfLine
-      case "$": return customCC ? .char("$") : .endOfLine
+      case ".": return customCC ? .char(".") : .dot
+      case "^": return customCC ? .char("^") : .caretAnchor
+      case "$": return customCC ? .char("$") : .dollarAnchor
 
       // Escaped
       case "\\": return p.expectEscaped().value
diff --git a/Sources/_RegexParser/Regex/Parse/Sema.swift b/Sources/_RegexParser/Regex/Parse/Sema.swift
index 0aeee282d..ea541fba7 100644
--- a/Sources/_RegexParser/Regex/Parse/Sema.swift
+++ b/Sources/_RegexParser/Regex/Parse/Sema.swift
@@ -221,7 +221,7 @@ extension RegexValidator {
   ) {
     switch esc {
     case .resetStartOfMatch, .singleDataUnit, .trueAnychar,
-        // '\N' needs to be emitted using 'emitAny'.
+        // '\N' needs to be emitted using 'emitDot'.
         .notNewline:
       error(.unsupported("'\\\(esc.character)'"), at: loc)
 
@@ -288,7 +288,7 @@ extension RegexValidator {
               at: atom.location)
       }
 
-    case .char, .scalar, .startOfLine, .endOfLine, .any:
+    case .char, .scalar, .caretAnchor, .dollarAnchor, .dot:
       break
 
     case .invalid:
diff --git a/Sources/_RegexParser/Regex/Printing/DumpAST.swift b/Sources/_RegexParser/Regex/Printing/DumpAST.swift
index 48a2512cf..cf5a56721 100644
--- a/Sources/_RegexParser/Regex/Printing/DumpAST.swift
+++ b/Sources/_RegexParser/Regex/Printing/DumpAST.swift
@@ -153,9 +153,9 @@ extension AST.Atom {
     case .keyboardControl, .keyboardMeta, .keyboardMetaControl:
       fatalError("TODO")
 
-    case .any:         return "."
-    case .startOfLine: return "^"
-    case .endOfLine:   return "$"
+    case .dot:          return "."
+    case .caretAnchor:  return "^"
+    case .dollarAnchor: return "$"
 
     case .backreference(let r), .subpattern(let r):
       return "\(r._dumpBase)"
diff --git a/Sources/_RegexParser/Regex/Printing/PrintAsCanonical.swift b/Sources/_RegexParser/Regex/Printing/PrintAsCanonical.swift
index 0e7cfb1d3..6b8c8ab93 100644
--- a/Sources/_RegexParser/Regex/Printing/PrintAsCanonical.swift
+++ b/Sources/_RegexParser/Regex/Printing/PrintAsCanonical.swift
@@ -237,9 +237,6 @@ extension AST.Atom.Number {
 
 extension AST.Atom {
   var _canonicalBase: String {
-    if let anchor = self.assertionKind {
-      return anchor.rawValue
-    }
     if let lit = self.literalStringValue {
       // FIXME: We may have to re-introduce escapes
       // For example, `\.` will come back as "." instead
@@ -248,6 +245,10 @@ extension AST.Atom {
       return lit
     }
     switch self.kind {
+    case .caretAnchor:
+      return "^"
+    case .dollarAnchor:
+      return "$"
     case .escaped(let e):
       return "\\\(e.character)"
     case .backreference(let br):
diff --git a/Sources/_RegexParser/Utility/Misc.swift b/Sources/_RegexParser/Utility/Misc.swift
index d37dfbd4a..70dc7a7d5 100644
--- a/Sources/_RegexParser/Utility/Misc.swift
+++ b/Sources/_RegexParser/Utility/Misc.swift
@@ -19,6 +19,21 @@ extension Substring {
   var string: String { String(self) }
 }
 
+extension Character {
+  /// Whether this character is made up of exactly one Unicode scalar value.
+  public var hasExactlyOneScalar: Bool {
+    let scalars = unicodeScalars
+    return scalars.index(after: scalars.startIndex) == scalars.endIndex
+  }
+
+  /// Whether the given character is in NFC form.
+  internal var isNFC: Bool {
+    if isASCII { return true }
+    let str = String(self)
+    return str._nfcCodeUnits.elementsEqual(str.utf8)
+  }
+}
+
 extension CustomStringConvertible {
   @_alwaysEmitIntoClient
   public var halfWidthCornerQuoted: String {
diff --git a/Sources/_StringProcessing/ByteCodeGen.swift b/Sources/_StringProcessing/ByteCodeGen.swift
index d18d50aa0..e8c92f2b5 100644
--- a/Sources/_StringProcessing/ByteCodeGen.swift
+++ b/Sources/_StringProcessing/ByteCodeGen.swift
@@ -58,14 +58,24 @@ fileprivate extension Compiler.ByteCodeGen {
     case .any:
       emitAny()
 
+    case .anyNonNewline:
+      emitAnyNonNewline()
+
+    case .dot:
+      emitDot()
+
     case let .char(c):
-      try emitCharacter(c)
+      emitCharacter(c)
 
     case let .scalar(s):
-      try emitScalar(s)
+      if options.semanticLevel == .graphemeCluster {
+        emitCharacter(Character(s))
+      } else {
+        emitMatchScalar(s)
+      }
 
     case let .assertion(kind):
-      try emitAssertion(kind.ast)
+      try emitAssertion(kind)
 
     case let .backreference(ref):
       try emitBackreference(ref.ast)
@@ -88,6 +98,34 @@ fileprivate extension Compiler.ByteCodeGen {
     }
   }
 
+  mutating func emitQuotedLiteral(_ s: String) {
+    guard options.semanticLevel == .graphemeCluster else {
+      for char in s {
+        for scalar in char.unicodeScalars {
+          emitMatchScalar(scalar)
+        }
+      }
+      return
+    }
+
+    // Fast path for eliding boundary checks for an all ascii quoted literal
+    if optimizationsEnabled && s.allSatisfy(\.isASCII) {
+      let lastIdx = s.unicodeScalars.indices.last!
+      for idx in s.unicodeScalars.indices {
+        let boundaryCheck = idx == lastIdx
+        let scalar = s.unicodeScalars[idx]
+        if options.isCaseInsensitive && scalar.properties.isCased {
+          builder.buildMatchScalarCaseInsensitive(scalar, boundaryCheck: boundaryCheck)
+        } else {
+          builder.buildMatchScalar(scalar, boundaryCheck: boundaryCheck)
+        }
+      }
+      return
+    }
+
+    for c in s { emitCharacter(c) }
+  }
+
   mutating func emitBackreference(
     _ ref: AST.Reference
   ) throws {
@@ -110,8 +148,34 @@ fileprivate extension Compiler.ByteCodeGen {
     }
   }
 
+  mutating func emitStartOfLine() {
+    builder.buildAssert { [semanticLevel = options.semanticLevel]
+        (_, _, input, pos, subjectBounds) in
+      if pos == subjectBounds.lowerBound { return true }
+      switch semanticLevel {
+      case .graphemeCluster:
+        return input[input.index(before: pos)].isNewline
+      case .unicodeScalar:
+        return input.unicodeScalars[input.unicodeScalars.index(before: pos)].isNewline
+      }
+    }
+  }
+
+  mutating func emitEndOfLine() {
+    builder.buildAssert { [semanticLevel = options.semanticLevel]
+      (_, _, input, pos, subjectBounds) in
+      if pos == subjectBounds.upperBound { return true }
+      switch semanticLevel {
+      case .graphemeCluster:
+        return input[pos].isNewline
+      case .unicodeScalar:
+        return input.unicodeScalars[pos].isNewline
+      }
+    }
+  }
+
   mutating func emitAssertion(
-    _ kind: AST.Atom.AssertionKind
+    _ kind: DSLTree.Atom.Assertion
   ) throws {
     // FIXME: Depends on API model we have... We may want to
     // think through some of these with API interactions in mind
@@ -168,43 +232,23 @@ fileprivate extension Compiler.ByteCodeGen {
       }
 
     case .startOfLine:
-      // FIXME: Anchor.startOfLine must always use this first branch
-      // The behavior of `^` should depend on `anchorsMatchNewlines`, but
-      // the DSL-based `.startOfLine` anchor should always match the start
-      // of a line. Right now we don't distinguish between those anchors.
+      emitStartOfLine()
+
+    case .endOfLine:
+      emitEndOfLine()
+
+    case .caretAnchor:
       if options.anchorsMatchNewlines {
-        builder.buildAssert { [semanticLevel = options.semanticLevel]
-            (_, _, input, pos, subjectBounds) in
-          if pos == subjectBounds.lowerBound { return true }
-          switch semanticLevel {
-          case .graphemeCluster:
-            return input[input.index(before: pos)].isNewline
-          case .unicodeScalar:
-            return input.unicodeScalars[input.unicodeScalars.index(before: pos)].isNewline
-          }
-        }
+        emitStartOfLine()
       } else {
         builder.buildAssert { (_, _, input, pos, subjectBounds) in
           pos == subjectBounds.lowerBound
         }
       }
-      
-    case .endOfLine:
-      // FIXME: Anchor.endOfLine must always use this first branch
-      // The behavior of `$` should depend on `anchorsMatchNewlines`, but
-      // the DSL-based `.endOfLine` anchor should always match the end
-      // of a line. Right now we don't distinguish between those anchors.
+
+    case .dollarAnchor:
       if options.anchorsMatchNewlines {
-        builder.buildAssert { [semanticLevel = options.semanticLevel]
-            (_, _, input, pos, subjectBounds) in
-          if pos == subjectBounds.upperBound { return true }
-          switch semanticLevel {
-          case .graphemeCluster:
-            return input[pos].isNewline
-          case .unicodeScalar:
-            return input.unicodeScalars[pos].isNewline
-          }
-        }
+        emitEndOfLine()
       } else {
         builder.buildAssert { (_, _, input, pos, subjectBounds) in
           pos == subjectBounds.upperBound
@@ -245,60 +289,70 @@ fileprivate extension Compiler.ByteCodeGen {
     }
   }
   
-  mutating func emitScalar(_ s: UnicodeScalar) throws {
-    // TODO: Native instruction buildMatchScalar(s)
-    if options.isCaseInsensitive {
-      // TODO: e.g. buildCaseInsensitiveMatchScalar(s)
-      builder.buildConsume(by: consumeScalar {
-        $0.properties.lowercaseMapping == s.properties.lowercaseMapping
-      })
+  mutating func emitMatchScalar(_ s: UnicodeScalar) {
+    assert(options.semanticLevel == .unicodeScalar)
+    if options.isCaseInsensitive && s.properties.isCased {
+      builder.buildMatchScalarCaseInsensitive(s, boundaryCheck: false)
     } else {
-      builder.buildConsume(by: consumeScalar {
-        $0 == s
-      })
+      builder.buildMatchScalar(s, boundaryCheck: false)
     }
   }
   
-  mutating func emitCharacter(_ c: Character) throws {
-    // Unicode scalar matches the specific scalars that comprise a character
+  mutating func emitCharacter(_ c: Character) {
+    // Unicode scalar mode matches the specific scalars that comprise a character
     if options.semanticLevel == .unicodeScalar {
       for scalar in c.unicodeScalars {
-        try emitScalar(scalar)
+        emitMatchScalar(scalar)
       }
       return
     }
     
     if options.isCaseInsensitive && c.isCased {
-      // TODO: buildCaseInsensitiveMatch(c) or buildMatch(c, caseInsensitive: true)
-      builder.buildConsume { input, bounds in
-        let inputChar = input[bounds.lowerBound].lowercased()
-        let matchChar = c.lowercased()
-        return inputChar == matchChar
-          ? input.index(after: bounds.lowerBound)
-          : nil
+      if optimizationsEnabled && c.isASCII {
+        // c.isCased ensures that c is not CR-LF,
+        // so we know that c is a single scalar
+        assert(c.unicodeScalars.count == 1)
+        builder.buildMatchScalarCaseInsensitive(
+          c.unicodeScalars.last!,
+          boundaryCheck: true)
+      } else {
+        builder.buildMatch(c, isCaseInsensitive: true)
       }
-    } else {
-      builder.buildMatch(c)
+      return
+    }
+    
+    if optimizationsEnabled && c.isASCII {
+      let lastIdx = c.unicodeScalars.indices.last!
+      for idx in c.unicodeScalars.indices {
+        builder.buildMatchScalar(c.unicodeScalars[idx], boundaryCheck: idx == lastIdx)
+      }
+      return
     }
+      
+    builder.buildMatch(c, isCaseInsensitive: false)
   }
 
   mutating func emitAny() {
-    switch (options.semanticLevel, options.dotMatchesNewline) {
-    case (.graphemeCluster, true):
+    switch options.semanticLevel {
+    case .graphemeCluster:
       builder.buildAdvance(1)
-    case (.graphemeCluster, false):
+    case .unicodeScalar:
+      // TODO: builder.buildAdvanceUnicodeScalar(1)
       builder.buildConsume { input, bounds in
-        input[bounds.lowerBound].isNewline
-        ? nil
-        : input.index(after: bounds.lowerBound)
+        input.unicodeScalars.index(after: bounds.lowerBound)
       }
+    }
+  }
 
-    case (.unicodeScalar, true):
-      // TODO: builder.buildAdvanceUnicodeScalar(1)
+  mutating func emitAnyNonNewline() {
+    switch options.semanticLevel {
+    case .graphemeCluster:
       builder.buildConsume { input, bounds in
-        input.unicodeScalars.index(after: bounds.lowerBound)
+        input[bounds.lowerBound].isNewline
+        ? nil
+        : input.index(after: bounds.lowerBound)
       }
-    case (.unicodeScalar, false):
+    case .unicodeScalar:
       builder.buildConsume { input, bounds in
         input[bounds.lowerBound].isNewline
         ? nil
@@ -307,6 +361,14 @@ fileprivate extension Compiler.ByteCodeGen {
     }
   }
 
+  mutating func emitDot() {
+    if options.dotMatchesNewline {
+      emitAny()
+    } else {
+      emitAnyNonNewline()
+    }
+  }
+
   mutating func emitAlternation(
     _ children: [DSLTree.Node]
   ) throws {
@@ -543,7 +605,12 @@ fileprivate extension Compiler.ByteCodeGen {
           decrement %minTrips and fallthrough
 
       loop-body:
+        <if can't guarantee forward progress && extraTrips = nil>:
+          mov currentPosition %pos
         evaluate the subexpression
+        <if can't guarantee forward progress && extraTrips = nil>:
+          if %pos is currentPosition:
+            goto exit
         goto min-trip-count control block
 
       exit-policy control block:
@@ -646,7 +713,28 @@ fileprivate extension Compiler.ByteCodeGen {
     //   <subexpression>
     //   branch min-trip-count
     builder.label(loopBody)
+
+    // if we aren't sure if the child node will have forward progress and
+    // we have an unbounded quantification
+    let startPosition: PositionRegister?
+    let emitPositionChecking =
+      (!optimizationsEnabled || !child.guaranteesForwardProgress) &&
+      extraTrips == nil
+
+    if emitPositionChecking {
+      startPosition = builder.makePositionRegister()
+      builder.buildMoveCurrentPosition(into: startPosition!)
+    } else {
+      startPosition = nil
+    }
     try emitNode(child)
+    if emitPositionChecking {
+      // in all quantifier cases, no matter what minTrips or extraTrips is,
+      // if we have a successful non-advancing match, branch to exit because it
+      // can match an arbitrary number of times
+      builder.buildCondBranch(to: exit, ifSamePositionAs: startPosition!)
+    }
+
     if minTrips <= 1 {
       // fallthrough
     } else {
@@ -687,21 +775,183 @@ fileprivate extension Compiler.ByteCodeGen {
     builder.label(exit)
   }
 
+  /// Coalesce any adjacent scalar members in a custom character class together.
+  /// This is required in order to produce correct grapheme matching behavior.
+  func coalescingCustomCharacterClassMembers(
+    _ members: [DSLTree.CustomCharacterClass.Member]
+  ) -> [DSLTree.CustomCharacterClass.Member] {
+    struct Accumulator {
+      /// A series of range operands. For example, in `[ab-cde-fg]`, this will
+      /// contain the strings `["ab", "cde", "fg"]`. From there, the resulting
+      /// ranges will be created.
+      private var rangeOperands: [String] = [""]
+
+      /// The current range operand.
+      private var current: String {
+        _read { yield rangeOperands[rangeOperands.count - 1] }
+        _modify { yield &rangeOperands[rangeOperands.count - 1] }
+      }
+
+      /// Try to accumulate a character class member, returning `true` if
+      /// successful, `false` otherwise.
+      mutating func tryAccumulate(
+        _ member: DSLTree.CustomCharacterClass.Member
+      ) -> Bool {
+        switch member {
+        case .atom(let a):
+          guard let c = a.literalCharacterValue else { return false }
+          current.append(c)
+          return true
+        case .quotedLiteral(let str):
+          current += str
+          return true
+        case let .range(lhs, rhs):
+          guard let lhs = lhs.literalCharacterValue,
+                let rhs = rhs.literalCharacterValue
+          else { return false }
+          current.append(lhs)
+          rangeOperands.append(String(rhs))
+          return true
+        case .trivia:
+          // Trivia can be completely ignored if we've already coalesced
+          // something.
+          return !current.isEmpty
+        default:
+          return false
+        }
+      }
+
+      func finish() -> [DSLTree.CustomCharacterClass.Member] {
+        if rangeOperands.count == 1 {
+          // If we didn't have any additional range operands, this isn't a
+          // range, we can just form a standard quoted literal.
+          return [.quotedLiteral(current)]
+        }
+        var members = [DSLTree.CustomCharacterClass.Member]()
+
+        // We have other range operands, splice them together. For N operands
+        // we have N - 1 ranges.
+        for (i, lhs) in rangeOperands.dropLast().enumerated() {
+          let rhs = rangeOperands[i + 1]
+
+          // If this is the first operand we only need to drop the last
+          // character for its quoted members, otherwise this is both an LHS
+          // and RHS of a range, and as such needs both sides trimmed.
+          let leading = i == 0 ? lhs.dropLast() : lhs.dropFirst().dropLast()
+          if !leading.isEmpty {
+            members.append(.quotedLiteral(String(leading)))
+          }
+          members.append(.range(.char(lhs.last!), .char(rhs.first!)))
+        }
+        // We've handled everything except the quoted portion of the last
+        // operand, add it now.
+        let trailing = rangeOperands.last!.dropFirst()
+        if !trailing.isEmpty {
+          members.append(.quotedLiteral(String(trailing)))
+        }
+        return members
+      }
+    }
+    return members
+      .map { m -> DSLTree.CustomCharacterClass.Member in
+        // First we need to recursively coalsce any child character classes.
+        switch m {
+        case .custom(let ccc):
+          return .custom(coalescingCustomCharacterClass(ccc))
+        case .intersection(let lhs, let rhs):
+          return .intersection(
+            coalescingCustomCharacterClass(lhs),
+            coalescingCustomCharacterClass(rhs))
+        case .subtraction(let lhs, let rhs):
+          return .subtraction(
+            coalescingCustomCharacterClass(lhs),
+            coalescingCustomCharacterClass(rhs))
+        case .symmetricDifference(let lhs, let rhs):
+          return .symmetricDifference(
+            coalescingCustomCharacterClass(lhs),
+            coalescingCustomCharacterClass(rhs))
+        case .atom, .range, .quotedLiteral, .trivia:
+          return m
+        }
+      }
+      .coalescing(with: Accumulator(), into: { $0.finish() }) { accum, member in
+        accum.tryAccumulate(member)
+      }
+  }
+
+  func coalescingCustomCharacterClass(
+    _ ccc: DSLTree.CustomCharacterClass
+  ) -> DSLTree.CustomCharacterClass {
+    // This only needs to be done in grapheme semantic mode. In scalar semantic
+    // mode, we don't want to coalesce any scalars into a grapheme. This
+    // means that e.g `[e\u{301}-\u{302}]` remains a range between U+301 and
+    // U+302.
+    guard options.semanticLevel == .graphemeCluster else { return ccc }
+
+    let members = coalescingCustomCharacterClassMembers(ccc.members)
+    return .init(members: members, isInverted: ccc.isInverted)
+  }
+
   mutating func emitCustomCharacterClass(
     _ ccc: DSLTree.CustomCharacterClass
   ) throws {
+    // Before emitting a custom character class in grapheme semantic mode, we
+    // need to coalesce together any adjacent characters and scalars, over which
+    // we can perform grapheme breaking. This includes e.g range bounds for
+    // `[e\u{301}-\u{302}]`.
+    let ccc = coalescingCustomCharacterClass(ccc)
     if let asciiBitset = ccc.asAsciiBitset(options),
-        options.semanticLevel == .graphemeCluster,
         optimizationsEnabled {
-      // future work: add a bit to .matchBitset to consume either a character
-      // or a scalar so we can have this optimization in scalar mode
-      builder.buildMatchAsciiBitset(asciiBitset)
+      if options.semanticLevel == .unicodeScalar {
+        builder.buildScalarMatchAsciiBitset(asciiBitset)
+      } else {
+        builder.buildMatchAsciiBitset(asciiBitset)
+      }
     } else {
       let consumer = try ccc.generateConsumer(options)
       builder.buildConsume(by: consumer)
     }
   }
 
+  mutating func emitConcatenation(_ children: [DSLTree.Node]) throws {
+    // Before emitting a concatenation, we need to flatten out any nested
+    // concatenations, and coalesce any adjacent characters and scalars, forming
+    // quoted literals of their contents, over which we can perform grapheme
+    // breaking.
+    func flatten(_ node: DSLTree.Node) -> [DSLTree.Node] {
+      switch node {
+      case .concatenation(let ch):
+        return ch.flatMap(flatten)
+      case .convertedRegexLiteral(let n, _):
+        return flatten(n)
+      default:
+        return [node]
+      }
+    }
+    let children = children
+      .flatMap(flatten)
+      .coalescing(with: "", into: DSLTree.Node.quotedLiteral) { str, node in
+        switch node {
+        case .atom(let a):
+          guard let c = a.literalCharacterValue else { return false }
+          str.append(c)
+          return true
+        case .quotedLiteral(let q):
+          str += q
+          return true
+        case .trivia:
+          // Trivia can be completely ignored if we've already coalesced
+          // something.
+          return !str.isEmpty
+        default:
+          return false
+        }
+      }
+    for child in children {
+      try emitConcatenationComponent(child)
+    }
+  }
+
   @discardableResult
   mutating func emitNode(_ node: DSLTree.Node) throws -> ValueRegister? {
     switch node {
@@ -710,9 +960,7 @@ fileprivate extension Compiler.ByteCodeGen {
       try emitAlternation(children)
 
     case let .concatenation(children):
-      for child in children {
-        try emitConcatenationComponent(child)
-      }
+      try emitConcatenation(children)
 
     case let .capture(name, refId, child, transform):
       options.beginScope()
@@ -758,9 +1006,9 @@ fileprivate extension Compiler.ByteCodeGen {
       try emitQuantification(amt.ast, kind, child)
 
     case let .customCharacterClass(ccc):
-      if ccc.containsAny {
+      if ccc.containsDot {
         if !ccc.isInverted {
-          emitAny()
+          emitDot()
         } else {
           throw Unsupported("Inverted any")
         }
@@ -772,45 +1020,7 @@ fileprivate extension Compiler.ByteCodeGen {
       try emitAtom(a)
 
     case let .quotedLiteral(s):
-      if options.semanticLevel == .graphemeCluster {
-        if options.isCaseInsensitive {
-          // TODO: buildCaseInsensitiveMatchSequence(c) or alternative
-          builder.buildConsume { input, bounds in
-            var iterator = s.makeIterator()
-            var currentIndex = bounds.lowerBound
-            while let ch = iterator.next() {
-              guard currentIndex < bounds.upperBound,
-                    ch.lowercased() == input[currentIndex].lowercased()
-              else { return nil }
-              input.formIndex(after: &currentIndex)
-            }
-            return currentIndex
-          }
-        } else {
-          builder.buildMatchSequence(s)
-        }
-      } else {
-        builder.buildConsume {
-          [caseInsensitive = options.isCaseInsensitive] input, bounds in
-          // TODO: Case folding
-          var iterator = s.unicodeScalars.makeIterator()
-          var currentIndex = bounds.lowerBound
-          while let scalar = iterator.next() {
-            guard currentIndex < bounds.upperBound else { return nil }
-            if caseInsensitive {
-              if scalar.properties.lowercaseMapping != input.unicodeScalars[currentIndex].properties.lowercaseMapping {
-                return nil
-              }
-            } else {
-              if scalar != input.unicodeScalars[currentIndex] {
-                return nil
-              }
-            }
-            input.unicodeScalars.formIndex(after: &currentIndex)
-          }
-          return currentIndex
-        }
-      }
+      emitQuotedLiteral(s)
 
     case let .convertedRegexLiteral(n, _):
       return try emitNode(n)
@@ -832,3 +1042,42 @@ fileprivate extension Compiler.ByteCodeGen {
     return nil
   }
 }
+
+extension DSLTree.Node {
+  var guaranteesForwardProgress: Bool {
+    switch self {
+    case .orderedChoice(let children):
+      return children.allSatisfy { $0.guaranteesForwardProgress }
+    case .concatenation(let children):
+      return children.contains(where: { $0.guaranteesForwardProgress })
+    case .capture(_, _, let node, _):
+      return node.guaranteesForwardProgress
+    case .nonCapturingGroup(let kind, let child):
+      switch kind.ast {
+      case .lookahead, .negativeLookahead, .lookbehind, .negativeLookbehind:
+        return false
+      default: return child.guaranteesForwardProgress
+      }
+    case .atom(let atom):
+      switch atom {
+      case .changeMatchingOptions, .assertion: return false
+      default: return true
+      }
+    case .trivia, .empty:
+      return false
+    case .quotedLiteral(let string):
+      return !string.isEmpty
+    case .convertedRegexLiteral(let node, _):
+      return node.guaranteesForwardProgress
+    case .consumer, .matcher:
+      // Allow zero width consumers and matchers
+     return false
+    case .customCharacterClass:
+      return true
+    case .quantification(let amount, _, let child):
+      let (atLeast, _) = amount.ast.bounds
+      return atLeast ?? 0 > 0 && child.guaranteesForwardProgress
+    default: return false
+    }
+  }
+}
diff --git a/Sources/_StringProcessing/Compiler.swift b/Sources/_StringProcessing/Compiler.swift
index 530126a32..b8daa8b21 100644
--- a/Sources/_StringProcessing/Compiler.swift
+++ b/Sources/_StringProcessing/Compiler.swift
@@ -42,19 +42,43 @@ class Compiler {
   }
 }
 
+/// Hashable wrapper for `Any.Type`.
+struct AnyHashableType: CustomStringConvertible, Hashable {
+  var ty: Any.Type
+  init(_ ty: Any.Type) {
+    self.ty = ty
+  }
+  var description: String { "\(ty)" }
+
+  static func == (lhs: Self, rhs: Self) -> Bool {
+    lhs.ty == rhs.ty
+  }
+  func hash(into hasher: inout Hasher) {
+    hasher.combine(ObjectIdentifier(ty))
+  }
+}
+
 // An error produced when compiling a regular expression.
-enum RegexCompilationError: Error, CustomStringConvertible {
+enum RegexCompilationError: Error, Hashable, CustomStringConvertible {
   // TODO: Source location?
   case uncapturedReference
+  case incorrectOutputType(incorrect: AnyHashableType, correct: AnyHashableType)
+  case invalidCharacterClassRangeOperand(Character)
+
+  static func incorrectOutputType(
+    incorrect: Any.Type, correct: Any.Type
+  ) -> Self {
+    .incorrectOutputType(incorrect: .init(incorrect), correct: .init(correct))
+  }
 
-  case incorrectOutputType(incorrect: Any.Type, correct: Any.Type)
-  
   var description: String {
     switch self {
     case .uncapturedReference:
       return "Found a reference used before it captured any match."
     case .incorrectOutputType(let incorrect, let correct):
       return "Cast to incorrect type 'Regex<\(incorrect)>', expected 'Regex<\(correct)>'"
+    case .invalidCharacterClassRangeOperand(let c):
+      return "'\(c)' is an invalid bound for character class range"
     }
   }
 }
diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift
index dbb324b67..083781120 100644
--- a/Sources/_StringProcessing/ConsumerInterface.swift
+++ b/Sources/_StringProcessing/ConsumerInterface.swift
@@ -11,6 +11,13 @@
 
 @_implementationOnly import _RegexParser
 
+extension Character {
+  var _singleScalarAsciiValue: UInt8? {
+    guard self != "\r\n" else { return nil }
+    return asciiValue
+  }
+}
+
 extension DSLTree.Node {
   /// Attempt to generate a consumer from this AST node
   ///
@@ -53,11 +60,50 @@ extension DSLTree._AST.Atom {
   }
 }
 
+extension Character {
+  func generateConsumer(
+    _ opts: MatchingOptions
+  ) throws -> MEProgram.ConsumeFunction {
+    let isCaseInsensitive = opts.isCaseInsensitive
+    switch opts.semanticLevel {
+    case .graphemeCluster:
+      return { input, bounds in
+        let low = bounds.lowerBound
+        if isCaseInsensitive && isCased {
+          return input[low].lowercased() == lowercased()
+            ? input.index(after: low)
+            : nil
+        } else {
+          return input[low] == self
+            ? input.index(after: low)
+            : nil
+        }
+      }
+    case .unicodeScalar:
+      // TODO: This should only be reachable from character class emission, can
+      // we guarantee that? Otherwise we'd want a different matching behavior.
+      let consumers = unicodeScalars.map { s in consumeScalar {
+        isCaseInsensitive
+          ? $0.properties.lowercaseMapping == s.properties.lowercaseMapping
+          : $0 == s
+      }}
+      return { input, bounds in
+        for fn in consumers {
+          if let idx = fn(input, bounds) {
+            return idx
+          }
+        }
+        return nil
+      }
+    }
+  }
+}
+
 extension DSLTree.Atom {
   var singleScalarASCIIValue: UInt8? {
     switch self {
-    case let .char(c) where c != "\r\n":
-      return c.asciiValue
+    case let .char(c):
+      return c._singleScalarAsciiValue
     case let .scalar(s) where s.isASCII:
       return UInt8(ascii: s)
     case let .unconverted(atom):
@@ -72,44 +118,15 @@ extension DSLTree.Atom {
   func generateConsumer(
     _ opts: MatchingOptions
   ) throws -> MEProgram.ConsumeFunction? {
-    let isCaseInsensitive = opts.isCaseInsensitive
-    
     switch self {
     case let .char(c):
-      if opts.semanticLevel == .graphemeCluster {
-        return { input, bounds in
-          let low = bounds.lowerBound
-          if isCaseInsensitive && c.isCased {
-            return input[low].lowercased() == c.lowercased()
-              ? input.index(after: low)
-              : nil
-          } else {
-            return input[low] == c
-              ? input.index(after: low)
-              : nil
-          }
-        }
-      } else {
-        let consumers = c.unicodeScalars.map { s in consumeScalar {
-          isCaseInsensitive
-            ? $0.properties.lowercaseMapping == s.properties.lowercaseMapping
-            : $0 == s
-        }}
-        return { input, bounds in
-          for fn in consumers {
-            if let idx = fn(input, bounds) {
-              return idx
-            }
-          }
-          return nil
-        }
-      }
+      return try c.generateConsumer(opts)
+
     case let .scalar(s):
-      return consumeScalar {
-        isCaseInsensitive
-          ? $0.properties.lowercaseMapping == s.properties.lowercaseMapping
-          : $0 == s
-      }
+      // A scalar always matches the same as a single scalar character. This
+      // means it must match a whole grapheme in grapheme semantic mode, but
+      // can match a single scalar in scalar semantic mode.
+      return try Character(s).generateConsumer(opts)
 
     case .any:
       // FIXME: Should this be a total ordering?
@@ -123,6 +140,25 @@ extension DSLTree.Atom {
         }
       }
 
+    case .anyNonNewline:
+      switch opts.semanticLevel {
+      case .graphemeCluster:
+        return { input, bounds in
+          input[bounds.lowerBound].isNewline
+            ? nil
+            : input.index(after: bounds.lowerBound)
+        }
+      case .unicodeScalar:
+        return { input, bounds in
+          input[bounds.lowerBound].isNewline
+            ? nil
+            : input.unicodeScalars.index(after: bounds.lowerBound)
+        }
+      }
+
+    case .dot:
+      throw Unreachable(".atom(.dot) should be handled by emitDot")
+
     case .assertion:
       // TODO: We could handle, should this be total?
       return nil
@@ -211,16 +247,20 @@ extension AST.Atom {
   var singleScalar: UnicodeScalar? {
     switch kind {
     case .scalar(let s): return s.value
+    case .escaped(let e):
+      guard let s = e.scalarValue else { return nil }
+      return s
     default: return nil
     }
   }
   
   var singleScalarASCIIValue: UInt8? {
+    if let s = singleScalar, s.isASCII {
+       return UInt8(ascii: s)
+     }
     switch kind {
-    case let .char(c) where c != "\r\n":
-      return c.asciiValue
-    case let .scalar(s) where s.value.isASCII:
-      return UInt8(ascii: s.value)
+    case let .char(c):
+      return c._singleScalarAsciiValue
     default:
       return nil
     }
@@ -264,12 +304,12 @@ extension AST.Atom {
     case let .namedCharacter(name):
       return consumeName(name, opts: opts)
       
-    case .any:
+    case .dot:
       assertionFailure(
         "Should have been handled by tree conversion")
-      fatalError(".atom(.any) is handled in emitAny")
+      fatalError(".atom(.dot) is handled in emitDot")
 
-    case .startOfLine, .endOfLine:
+    case .caretAnchor, .dollarAnchor:
       // handled in emitAssertion
       return nil
 
@@ -287,24 +327,25 @@ extension DSLTree.CustomCharacterClass.Member {
     _ opts: MatchingOptions,
     _ isInverted: Bool
   ) -> DSLTree.CustomCharacterClass.AsciiBitset? {
+    typealias Bitset = DSLTree.CustomCharacterClass.AsciiBitset
     switch self {
     case let .atom(a):
       if let val = a.singleScalarASCIIValue {
-        return DSLTree.CustomCharacterClass.AsciiBitset(
-          val,
-          isInverted,
-          opts.isCaseInsensitive
-        )
+        return Bitset(val, isInverted, opts.isCaseInsensitive)
       }
     case let .range(low, high):
-      if let lowVal = low.singleScalarASCIIValue, let highVal = high.singleScalarASCIIValue {
-        return DSLTree.CustomCharacterClass.AsciiBitset(
-          low: lowVal,
-          high: highVal,
-          isInverted: isInverted,
-          isCaseInsensitive: opts.isCaseInsensitive
-        )
+      if let lowVal = low.singleScalarASCIIValue,
+         let highVal = high.singleScalarASCIIValue {
+        return Bitset(low: lowVal, high: highVal, isInverted: isInverted,
+                      isCaseInsensitive: opts.isCaseInsensitive)
       }
+    case .quotedLiteral(let str):
+      var bitset = Bitset(isInverted: isInverted)
+      for c in str {
+        guard let ascii = c._singleScalarAsciiValue else { return nil }
+        bitset = bitset.union(Bitset(ascii, isInverted, opts.isCaseInsensitive))
+      }
+      return bitset
     default:
       return nil
     }
@@ -321,38 +362,68 @@ extension DSLTree.CustomCharacterClass.Member {
       }
       return c
     case let .range(low, high):
-      // TODO:
-      guard let lhs = low.literalCharacterValue else {
+      guard let lhsChar = low.literalCharacterValue else {
         throw Unsupported("\(low) in range")
       }
-      guard let rhs = high.literalCharacterValue else {
+      guard let rhsChar = high.literalCharacterValue else {
         throw Unsupported("\(high) in range")
       }
 
-      if opts.isCaseInsensitive {
-        let lhsLower = lhs.lowercased()
-        let rhsLower = rhs.lowercased()
-        guard lhsLower <= rhsLower else { throw Unsupported("Invalid range \(lhs)-\(rhs)") }
-        return { input, bounds in
-          // TODO: check for out of bounds?
-          let curIdx = bounds.lowerBound
-          if (lhsLower...rhsLower).contains(input[curIdx].lowercased()) {
-            // TODO: semantic level
-            return input.index(after: curIdx)
-          }
-          return nil
+      // We must have NFC single scalar bounds.
+      guard let lhs = lhsChar.singleScalar, lhs.isNFC else {
+        throw RegexCompilationError.invalidCharacterClassRangeOperand(lhsChar)
+      }
+      guard let rhs = rhsChar.singleScalar, rhs.isNFC else {
+        throw RegexCompilationError.invalidCharacterClassRangeOperand(rhsChar)
+      }
+      guard lhs <= rhs else {
+        throw Unsupported("Invalid range \(low)-\(high)")
+      }
+
+      let isCaseInsensitive = opts.isCaseInsensitive
+      let isCharacterSemantic = opts.semanticLevel == .graphemeCluster
+      
+      return { input, bounds in
+        let curIdx = bounds.lowerBound
+        let nextIndex = isCharacterSemantic
+          ? input.index(after: curIdx)
+          : input.unicodeScalars.index(after: curIdx)
+
+        // Under grapheme semantics, we compare based on single NFC scalars. If
+        // such a character is not single scalar under NFC, the match fails. In
+        // scalar semantics, we compare the exact scalar value to the NFC
+        // bounds.
+        let scalar = isCharacterSemantic ? input[curIdx].singleNFCScalar
+                                         : input.unicodeScalars[curIdx]
+        guard let scalar = scalar else { return nil }
+        let scalarRange = lhs ... rhs
+        if scalarRange.contains(scalar) {
+          return nextIndex
         }
-      } else {
-        guard lhs <= rhs else { throw Unsupported("Invalid range \(lhs)-\(rhs)") }
-        return { input, bounds in
-          // TODO: check for out of bounds?
-          let curIdx = bounds.lowerBound
-          if (lhs...rhs).contains(input[curIdx]) {
-            // TODO: semantic level
-            return input.index(after: curIdx)
+
+        // Check for case insensitive matches.
+        func matchesCased(
+          _ cased: (UnicodeScalar.Properties) -> String
+        ) -> Bool {
+          let casedStr = cased(scalar.properties)
+          // In character semantic mode, we need to map to NFC. In scalar
+          // semantics, we should have an exact scalar.
+          let mapped = isCharacterSemantic ? casedStr.singleNFCScalar
+                                           : casedStr.singleScalar
+          guard let mapped = mapped else { return false }
+          return scalarRange.contains(mapped)
+        }
+        if isCaseInsensitive {
+          if scalar.properties.changesWhenLowercased,
+              matchesCased(\.lowercaseMapping) {
+            return nextIndex
+          }
+          if scalar.properties.changesWhenUppercased,
+             matchesCased(\.uppercaseMapping) {
+            return nextIndex
           }
-          return nil
         }
+        return nil
       }
 
     case let .custom(ccc):
@@ -394,21 +465,17 @@ extension DSLTree.CustomCharacterClass.Member {
         }
         return rhs(input, bounds)
       }
-    case .quotedLiteral(let s):
-      if opts.isCaseInsensitive {
-        return { input, bounds in
-          guard s.lowercased()._contains(input[bounds.lowerBound].lowercased()) else {
-            return nil
-          }
-          return input.index(after: bounds.lowerBound)
-        }
-      } else {
-        return { input, bounds in
-          guard s.contains(input[bounds.lowerBound]) else {
-            return nil
+    case .quotedLiteral(let str):
+      let consumers = try str.map {
+        try $0.generateConsumer(opts)
+      }
+      return { input, bounds in
+        for fn in consumers {
+          if let idx = fn(input, bounds) {
+            return idx
           }
-          return input.index(after: bounds.lowerBound)
         }
+        return nil
       }
     case .trivia:
       // TODO: Should probably strip this earlier...
diff --git a/Sources/_StringProcessing/Engine/Backtracking.swift b/Sources/_StringProcessing/Engine/Backtracking.swift
index 8fcdf9312..355702ac1 100644
--- a/Sources/_StringProcessing/Engine/Backtracking.swift
+++ b/Sources/_StringProcessing/Engine/Backtracking.swift
@@ -32,15 +32,18 @@ extension Processor {
     // The int registers store values that can be relevant to
     // backtracking, such as the number of trips in a quantification.
     var intRegisters: [Int]
+    // Same with position registers
+    var posRegisters: [Input.Index]
 
     var destructure: (
       pc: InstructionAddress,
       pos: Position?,
       stackEnd: CallStackAddress,
       captureEnds: [_StoredCapture],
-      intRegisters: [Int]
+      intRegisters: [Int],
+      PositionRegister: [Input.Index]
     ) {
-      (pc, pos, stackEnd, captureEnds, intRegisters)
+      (pc, pos, stackEnd, captureEnds, intRegisters, posRegisters)
     }
   }
 
@@ -53,7 +56,8 @@ extension Processor {
       pos: addressOnly ? nil : currentPosition,
       stackEnd: .init(callStack.count),
       captureEnds: storedCaptures,
-      intRegisters: registers.ints)
+      intRegisters: registers.ints,
+      posRegisters: registers.positions)
   }
 }
 
diff --git a/Sources/_StringProcessing/Engine/InstPayload.swift b/Sources/_StringProcessing/Engine/InstPayload.swift
index c614e10fd..42fb86913 100644
--- a/Sources/_StringProcessing/Engine/InstPayload.swift
+++ b/Sources/_StringProcessing/Engine/InstPayload.swift
@@ -147,6 +147,26 @@ extension Instruction.Payload {
   var string: StringRegister {
     interpret()
   }
+  
+  init(scalar: Unicode.Scalar) {
+    self.init(UInt64(scalar.value))
+  }
+  var scalar: Unicode.Scalar {
+    return Unicode.Scalar(_value: UInt32(self.rawValue))
+  }
+
+  init(scalar: Unicode.Scalar, caseInsensitive: Bool, boundaryCheck: Bool) {
+    let raw = UInt64(scalar.value)
+      + (caseInsensitive ? 1 << 55: 0)
+      + (boundaryCheck ? 1 << 54 : 0)
+    self.init(raw)
+  }
+  var scalarPayload: (Unicode.Scalar, caseInsensitive: Bool, boundaryCheck: Bool) {
+    let caseInsensitive = (self.rawValue >> 55) & 1 == 1
+    let boundaryCheck = (self.rawValue >> 54) & 1 == 1
+    let scalar = Unicode.Scalar(_value: UInt32(self.rawValue & 0xFFFF_FFFF))
+    return (scalar, caseInsensitive: caseInsensitive, boundaryCheck: boundaryCheck)
+  }
 
   init(sequence: SequenceRegister) {
     self.init(sequence)
@@ -190,18 +210,20 @@ extension Instruction.Payload {
     interpret()
   }
 
-  init(element: ElementRegister) {
-    self.init(element)
+  init(element: ElementRegister, isCaseInsensitive: Bool) {
+    self.init(isCaseInsensitive ? 1 : 0, element)
   }
-  var element: ElementRegister {
-    interpret()
+  var elementPayload: (isCaseInsensitive: Bool, ElementRegister) {
+    let pair: (UInt64, ElementRegister) = interpretPair()
+    return (isCaseInsensitive: pair.0 == 1, pair.1)
   }
 
-  init(bitset: AsciiBitsetRegister) {
-    self.init(bitset)
+  init(bitset: AsciiBitsetRegister, isScalar: Bool) {
+    self.init(isScalar ? 1 : 0, bitset)
   }
-  var bitset: AsciiBitsetRegister {
-    interpret()
+  var bitsetPayload: (isScalar: Bool, AsciiBitsetRegister) {
+    let pair: (UInt64, AsciiBitsetRegister) = interpretPair()
+    return (isScalar: pair.0 == 1, pair.1)
   }
   
   init(consumer: ConsumeFunctionRegister) {
@@ -284,10 +306,10 @@ extension Instruction.Payload {
     interpretPair()
   }
 
-  init(pos: PositionRegister, pos2: PositionRegister) {
-    self.init(pos, pos2)
+  init(addr: InstructionAddress, position: PositionRegister) {
+    self.init(addr, position)
   }
-  var pairedPosPos: (PositionRegister, PositionRegister) {
+  var pairedAddrPos: (InstructionAddress, PositionRegister) {
     interpretPair()
   }
 
diff --git a/Sources/_StringProcessing/Engine/Instruction.swift b/Sources/_StringProcessing/Engine/Instruction.swift
index 4e715ad9d..8e1a1f294 100644
--- a/Sources/_StringProcessing/Engine/Instruction.swift
+++ b/Sources/_StringProcessing/Engine/Instruction.swift
@@ -37,6 +37,14 @@ extension Instruction {
     ///
     case moveImmediate
 
+    /// Move the current position into a register
+    ///
+    ///     moveCurrentPosition(into: PositionRegister)
+    ///
+    /// Operands:
+    ///   - Position register to move into
+    case moveCurrentPosition
+
     // MARK: General Purpose: Control flow
 
     /// Branch to a new instruction
@@ -57,6 +65,16 @@ extension Instruction {
     ///
     case condBranchZeroElseDecrement
 
+    /// Conditionally branch if the current position is the same as the register
+    ///
+    ///     condBranch(
+    ///       to: InstAddr, ifSamePositionAs: PositionRegister)
+    ///
+    /// Operands:
+    ///   - Instruction address to branch to, if the position in the register is the same as currentPosition
+    ///   - Position register to check against
+    case condBranchSamePosition
+  
     // TODO: Function calls
 
     // MARK: - Matching
@@ -72,20 +90,27 @@ extension Instruction {
 
     /// Composite assert-advance else restore.
     ///
-    ///     match(_: EltReg)
+    ///     match(_: EltReg, isCaseInsensitive: Bool)
     ///
-    /// Operand: Element register to compare against.
+    /// Operands:
+    ///  - Element register to compare against.
+    ///  - Boolean for if we should match in a case insensitive way
     case match
 
-    /// Match against a sequence of elements
+    /// Match against a scalar and possibly perform a boundary check or match in a case insensitive way
     ///
-    ///     matchSequence(_: SeqReg)
+    ///     matchScalar(_: Unicode.Scalar, isCaseInsensitive: Bool, boundaryCheck: Bool)
     ///
-    /// Operand: Sequence register to compare against.
-    case matchSequence
+    /// Operands: Scalar value to match against and booleans
+    case matchScalar
 
-    /// Match against a set of valid ascii values stored in a bitset
-    /// Operand: Ascii bitset register containing the bitset
+    /// Match a character or a scalar against a set of valid ascii values stored in a bitset
+    ///
+    ///     matchBitset(_: AsciiBitsetRegister, isScalar: Bool)
+    ///
+    /// Operand:
+    ///  - Ascii bitset register containing the bitset
+    ///  - Boolean for if we should match by scalar value
     case matchBitset
 
     /// TODO: builtin assertions and anchors
@@ -306,7 +331,7 @@ extension Instruction {
   var elementRegister: ElementRegister? {
     switch opcode {
     case .match:
-      return payload.element
+      return payload.elementPayload.1
     default: return nil
     }
   }
diff --git a/Sources/_StringProcessing/Engine/MEBuilder.swift b/Sources/_StringProcessing/Engine/MEBuilder.swift
index 676b21473..0b9a91726 100644
--- a/Sources/_StringProcessing/Engine/MEBuilder.swift
+++ b/Sources/_StringProcessing/Engine/MEBuilder.swift
@@ -32,6 +32,7 @@ extension MEProgram {
     var nextIntRegister = IntRegister(0)
     var nextCaptureRegister = CaptureRegister(0)
     var nextValueRegister = ValueRegister(0)
+    var nextPositionRegister = PositionRegister(0)
 
     // Special addresses or instructions
     var failAddressToken: AddressToken? = nil
@@ -105,6 +106,14 @@ extension MEProgram.Builder {
     fixup(to: t)
   }
 
+  mutating func buildCondBranch(
+    to t: AddressToken,
+    ifSamePositionAs r: PositionRegister
+  ) {
+    instructions.append(.init(.condBranchSamePosition, .init(position: r)))
+    fixup(to: t)
+  }
+
   mutating func buildSave(_ t: AddressToken) {
     instructions.append(.init(.save))
     fixup(to: t)
@@ -135,24 +144,32 @@ extension MEProgram.Builder {
     instructions.append(.init(.advance, .init(distance: n)))
   }
 
-  mutating func buildMatch(_ e: Character) {
+  mutating func buildMatch(_ e: Character, isCaseInsensitive: Bool) {
     instructions.append(.init(
-      .match, .init(element: elements.store(e))))
+      .match, .init(element: elements.store(e), isCaseInsensitive: isCaseInsensitive)))
   }
 
-  mutating func buildMatchSequence<S: Sequence>(
-    _ s: S
-  ) where S.Element == Character {
-    instructions.append(.init(
-      .matchSequence,
-      .init(sequence: sequences.store(.init(s)))))
+  mutating func buildMatchScalar(_ s: Unicode.Scalar, boundaryCheck: Bool) {
+    instructions.append(.init(.matchScalar, .init(scalar: s, caseInsensitive: false, boundaryCheck: boundaryCheck)))
+  }
+  
+  mutating func buildMatchScalarCaseInsensitive(_ s: Unicode.Scalar, boundaryCheck: Bool) {
+    instructions.append(.init(.matchScalar, .init(scalar: s, caseInsensitive: true, boundaryCheck: boundaryCheck)))
   }
 
+
   mutating func buildMatchAsciiBitset(
     _ b: DSLTree.CustomCharacterClass.AsciiBitset
   ) {
     instructions.append(.init(
-      .matchBitset, .init(bitset: makeAsciiBitset(b))))
+      .matchBitset, .init(bitset: makeAsciiBitset(b), isScalar: false)))
+  }
+
+  mutating func buildScalarMatchAsciiBitset(
+    _ b: DSLTree.CustomCharacterClass.AsciiBitset
+  ) {
+    instructions.append(.init(
+      .matchBitset, .init(bitset: makeAsciiBitset(b), isScalar: true)))
   }
 
   mutating func buildConsume(
@@ -211,6 +228,10 @@ extension MEProgram.Builder {
       .init(value: value, capture: capture)))
   }
 
+  mutating func buildMoveCurrentPosition(into r: PositionRegister) {
+    instructions.append(.init(.moveCurrentPosition, .init(position: r)))
+  }
+
   mutating func buildBackreference(
     _ cap: CaptureRegister
   ) {
@@ -257,7 +278,8 @@ extension MEProgram.Builder {
       switch inst.opcode {
       case .condBranchZeroElseDecrement:
         payload = .init(addr: addr, int: inst.payload.int)
-
+      case .condBranchSamePosition:
+        payload = .init(addr: addr, position: inst.payload.position)
       case .branch, .save, .saveAddress, .clearThrough:
         payload = .init(addr: addr)
 
@@ -281,6 +303,7 @@ extension MEProgram.Builder {
     regInfo.sequences = sequences.count
     regInfo.ints = nextIntRegister.rawValue
     regInfo.values = nextValueRegister.rawValue
+    regInfo.positions = nextPositionRegister.rawValue
     regInfo.bitsets = asciiBitsets.count
     regInfo.consumeFunctions = consumeFunctions.count
     regInfo.assertionFunctions = assertionFunctions.count
@@ -421,6 +444,12 @@ extension MEProgram.Builder {
     return r
   }
 
+  mutating func makePositionRegister() -> PositionRegister {
+    let r = nextPositionRegister
+    defer { nextPositionRegister.rawValue += 1 }
+    return r
+  }
+
   // TODO: A register-mapping helper struct, which could release
   // registers without monotonicity required
 
diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift
index f7b3a65a2..2be918294 100644
--- a/Sources/_StringProcessing/Engine/Processor.swift
+++ b/Sources/_StringProcessing/Engine/Processor.swift
@@ -219,6 +219,15 @@ extension Processor {
     return true
   }
 
+  mutating func matchCaseInsensitive(_ e: Element) -> Bool {
+    guard let cur = load(), cur.lowercased() == e.lowercased() else {
+      signalFailure()
+      return false
+    }
+    _uncheckedForcedConsumeOne()
+    return true
+  }
+
   // Match against the current input prefix. Returns whether
   // it succeeded vs signaling an error.
   mutating func matchSeq<C: Collection>(
@@ -230,6 +239,44 @@ extension Processor {
     return true
   }
   
+  func loadScalar() -> Unicode.Scalar? {
+    currentPosition < end ? input.unicodeScalars[currentPosition] : nil
+  }
+  
+  mutating func matchScalar(_ s: Unicode.Scalar, boundaryCheck: Bool) -> Bool {
+    guard s == loadScalar(),
+          let idx = input.unicodeScalars.index(
+            currentPosition,
+            offsetBy: 1,
+            limitedBy: end),
+          (!boundaryCheck || input.isOnGraphemeClusterBoundary(idx))
+    else {
+      signalFailure()
+      return false
+    }
+    currentPosition = idx
+    return true
+  }
+
+  mutating func matchScalarCaseInsensitive(
+    _ s: Unicode.Scalar,
+    boundaryCheck: Bool
+  ) -> Bool {
+    guard let curScalar = loadScalar(),
+          s.properties.lowercaseMapping == curScalar.properties.lowercaseMapping,
+          let idx = input.unicodeScalars.index(
+            currentPosition,
+            offsetBy: 1,
+            limitedBy: end),
+          (!boundaryCheck || input.isOnGraphemeClusterBoundary(idx))
+    else {
+      signalFailure()
+      return false
+    }
+    currentPosition = idx
+    return true
+  }
+
   // If we have a bitset we know that the CharacterClass only matches against
   // ascii characters, so check if the current input element is ascii then
   // check if it is set in the bitset
@@ -244,8 +291,22 @@ extension Processor {
     return true
   }
 
+  // Equivalent of matchBitset but emitted when in unicode scalar semantic mode
+  mutating func matchBitsetScalar(
+    _ bitset: DSLTree.CustomCharacterClass.AsciiBitset
+  ) -> Bool {
+    guard let curScalar = loadScalar(),
+            bitset.matches(scalar: curScalar),
+          let idx = input.unicodeScalars.index(currentPosition, offsetBy: 1, limitedBy: end) else {
+      signalFailure()
+      return false
+    }
+    currentPosition = idx
+    return true
+  }
+
   mutating func signalFailure() {
-    guard let (pc, pos, stackEnd, capEnds, intRegisters) =
+    guard let (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) =
             savePoints.popLast()?.destructure
     else {
       state = .fail
@@ -259,6 +320,7 @@ extension Processor {
     callStack.removeLast(callStack.count - stackEnd.rawValue)
     storedCaptures = capEnds
     registers.ints = intRegisters
+    registers.positions = posRegisters
   }
 
   mutating func abort(_ e: Error? = nil) {
@@ -315,7 +377,10 @@ extension Processor {
 
       registers[reg] = int
       controller.step()
-
+    case .moveCurrentPosition:
+      let reg = payload.position
+      registers[reg] = currentPosition
+      controller.step()
     case .branch:
       controller.pc = payload.addr
 
@@ -327,7 +392,13 @@ extension Processor {
         registers[int] -= 1
         controller.step()
       }
-
+    case .condBranchSamePosition:
+      let (addr, pos) = payload.pairedAddrPos
+      if registers[pos] == currentPosition {
+        controller.pc = addr
+      } else {
+        controller.step()
+      }
     case .save:
       let resumeAddr = payload.addr
       let sp = makeSavePoint(resumeAddr)
@@ -369,23 +440,40 @@ extension Processor {
       }
 
     case .match:
-      let reg = payload.element
-      if match(registers[reg]) {
-        controller.step()
+      let (isCaseInsensitive, reg) = payload.elementPayload
+      if isCaseInsensitive {
+        if matchCaseInsensitive(registers[reg]) {
+          controller.step()
+        }
+      } else {
+        if match(registers[reg]) {
+          controller.step()
+        }
       }
 
-    case .matchSequence:
-      let reg = payload.sequence
-      let seq = registers[reg]
-      if matchSeq(seq) {
-        controller.step()
+    case .matchScalar:
+      let (scalar, caseInsensitive, boundaryCheck) = payload.scalarPayload
+      if caseInsensitive {
+        if matchScalarCaseInsensitive(scalar, boundaryCheck: boundaryCheck) {
+          controller.step()
+        }
+      } else {
+        if matchScalar(scalar, boundaryCheck: boundaryCheck) {
+          controller.step()
+        }
       }
 
     case .matchBitset:
-      let reg = payload.bitset
+      let (isScalar, reg) = payload.bitsetPayload
       let bitset = registers[reg]
-      if matchBitset(bitset) {
-        controller.step()
+      if isScalar {
+        if matchBitsetScalar(bitset) {
+          controller.step()
+        }
+      } else {
+        if matchBitset(bitset) {
+          controller.step()
+        }
       }
 
     case .consumeBy:
diff --git a/Sources/_StringProcessing/Engine/Registers.swift b/Sources/_StringProcessing/Engine/Registers.swift
index c76413383..e5d33af8b 100644
--- a/Sources/_StringProcessing/Engine/Registers.swift
+++ b/Sources/_StringProcessing/Engine/Registers.swift
@@ -47,6 +47,8 @@ extension Processor {
     var ints: [Int]
 
     var values: [Any]
+
+    var positions: [Input.Index]
   }
 }
 
@@ -66,6 +68,12 @@ extension Processor.Registers {
       values[i.rawValue] = newValue
     }
   }
+  subscript(_ i: PositionRegister) -> Input.Index {
+    get { positions[i.rawValue] }
+    set {
+      positions[i.rawValue] = newValue
+    }
+  }
   subscript(_ i: ElementRegister) -> Input.Element {
     elements[i.rawValue]
   }
@@ -89,6 +97,8 @@ extension Processor.Registers {
 }
 
 extension Processor.Registers {
+  static let sentinelIndex = "".startIndex
+
   init(
     _ program: MEProgram,
     _ sentinel: String.Index
@@ -120,11 +130,15 @@ extension Processor.Registers {
 
     self.values = Array(
       repeating: SentinelValue(), count: info.values)
+    self.positions = Array(
+      repeating: Processor.Registers.sentinelIndex,
+      count: info.positions)
   }
 
   mutating func reset(sentinel: Input.Index) {
     self.ints._setAll(to: 0)
     self.values._setAll(to: SentinelValue())
+    self.positions._setAll(to: Processor.Registers.sentinelIndex)
   }
 }
 
diff --git a/Sources/_StringProcessing/PrintAsPattern.swift b/Sources/_StringProcessing/PrintAsPattern.swift
index 4237eda33..c1753c49d 100644
--- a/Sources/_StringProcessing/PrintAsPattern.swift
+++ b/Sources/_StringProcessing/PrintAsPattern.swift
@@ -70,16 +70,9 @@ extension PrettyPrinter {
     for namedCapture in namedCaptures {
       print("let \(namedCapture) = Reference(Substring.self)")
     }
-    
-    switch node {
-    case .concatenation(_):
-      printAsPattern(convertedFromAST: node)
-    case .convertedRegexLiteral(.concatenation(_), _):
-      printAsPattern(convertedFromAST: node)
-    default:
-      printBlock("Regex") { printer in
-        printer.printAsPattern(convertedFromAST: node)
-      }
+
+    printBlock("Regex") { printer in
+      printer.printAsPattern(convertedFromAST: node, isTopLevel: true)
     }
   }
 
@@ -89,7 +82,7 @@ extension PrettyPrinter {
   // to have a non-backing-off pretty-printer that this
   // can defer to.
   private mutating func printAsPattern(
-    convertedFromAST node: DSLTree.Node
+    convertedFromAST node: DSLTree.Node, isTopLevel: Bool = false
   ) {
     if patternBackoff(DSLTree._Tree(node)) {
       printBackoff(node)
@@ -106,11 +99,7 @@ extension PrettyPrinter {
       }
 
     case let .concatenation(c):
-      printBlock("Regex") { printer in
-        c.forEach {
-          printer.printAsPattern(convertedFromAST: $0)
-        }
-      }
+      printConcatenationAsPattern(c, isTopLevel: isTopLevel)
 
     case let .nonCapturingGroup(kind, child):
       switch kind.ast {
@@ -263,7 +252,7 @@ extension PrettyPrinter {
       // check above, so it should work out. Need a
       // cleaner way to do this. This means the argument
       // label is a lie.
-      printAsPattern(convertedFromAST: n)
+      printAsPattern(convertedFromAST: n, isTopLevel: isTopLevel)
 
     case let .customCharacterClass(ccc):
       printAsPattern(ccc)
@@ -279,6 +268,64 @@ extension PrettyPrinter {
       print("/* TODO: absent function */")
     }
   }
+
+  enum NodeToPrint {
+    case dslNode(DSLTree.Node)
+    case stringLiteral(String)
+  }
+
+  mutating func printAsPattern(_ node: NodeToPrint) {
+    switch node {
+    case .dslNode(let n):
+      printAsPattern(convertedFromAST: n)
+    case .stringLiteral(let str):
+      print(str)
+    }
+  }
+
+  mutating func printConcatenationAsPattern(
+    _ nodes: [DSLTree.Node], isTopLevel: Bool
+  ) {
+    // We need to coalesce any adjacent character and scalar elements into a
+    // string literal, preserving scalar syntax.
+    let nodes = nodes
+      .map { NodeToPrint.dslNode($0.lookingThroughConvertedLiteral) }
+      .coalescing(
+        with: StringLiteralBuilder(), into: { .stringLiteral($0.result) }
+      ) { literal, node in
+        guard case .dslNode(let node) = node else { return false }
+        switch node {
+        case let .atom(.char(c)):
+          literal.append(c)
+          return true
+        case let .atom(.scalar(s)):
+          literal.append(unescaped: s._dslBase)
+          return true
+        case .quotedLiteral(let q):
+          literal.append(q)
+          return true
+        case .trivia:
+          // Trivia can be completely ignored if we've already coalesced
+          // something.
+          return !literal.isEmpty
+        default:
+          return false
+        }
+      }
+    if isTopLevel || nodes.count == 1 {
+      // If we're at the top level, or we coalesced everything into a single
+      // element, we don't need to print a surrounding Regex { ... }.
+      for n in nodes {
+        printAsPattern(n)
+      }
+      return
+    }
+    printBlock("Regex") { printer in
+      for n in nodes {
+        printer.printAsPattern(n)
+      }
+    }
+  }
   
   mutating func printAsPattern(
     _ ccc: DSLTree.CustomCharacterClass,
@@ -315,8 +362,7 @@ extension PrettyPrinter {
       return
     }
     
-    var charMembers = ""
-    
+    var charMembers = StringLiteralBuilder()
 
     // This iterates through all of the character class members collecting all
     // of the members who can be stuffed into a singular '.anyOf(...)' vs.
@@ -340,14 +386,9 @@ extension PrettyPrinter {
         switch a {
         case let .char(c):
           charMembers.append(c)
-          
-          if c == "\\" {
-            charMembers.append(c)
-          }
-          
           return false
         case let .scalar(s):
-          charMembers += "\\u{\(String(s.value, radix: 16, uppercase: true))}"
+          charMembers.append(unescaped: s._dslBase)
           return false
         case .unconverted(_):
           return true
@@ -356,7 +397,7 @@ extension PrettyPrinter {
         }
         
       case let .quotedLiteral(s):
-        charMembers += s
+        charMembers.append(s)
         return false
         
       case .trivia(_):
@@ -370,7 +411,7 @@ extension PrettyPrinter {
     // Also in the same vein, if we have a few atom members but no
     // nonAtomMembers, then we can emit a single .anyOf(...) for them.
     if !charMembers.isEmpty, nonCharMembers.isEmpty {
-      let anyOf = ".anyOf(\(charMembers._quoted))"
+      let anyOf = ".anyOf(\(charMembers))"
       
       indent()
       
@@ -393,7 +434,7 @@ extension PrettyPrinter {
       printer.indent()
       
       if !charMembers.isEmpty {
-        printer.output(".anyOf(\(charMembers._quoted))")
+        printer.output(".anyOf(\(charMembers))")
         
         if nonCharMembers.count > 0 {
           printer.output(",")
@@ -454,9 +495,9 @@ extension PrettyPrinter {
       case let .scalar(s):
         
         if wrap {
-          output("One(.anyOf(\"\\u{\(String(s.value, radix: 16, uppercase: true))}\"))")
+          output("One(.anyOf(\(s._dslBase._bareQuoted)))")
         } else {
-          output(".anyOf(\"\\u{\(String(s.value, radix: 16, uppercase: true))}\")")
+          output(".anyOf(\(s._dslBase._bareQuoted))")
         }
         
       case let .unconverted(a):
@@ -617,13 +658,46 @@ extension PrettyPrinter {
 }
 
 extension String {
-  // TODO: Escaping?
+  fileprivate var _escaped: String {
+    _replacing(#"\"#, with: #"\\"#)._replacing(#"""#, with: #"\""#)
+  }
+
   fileprivate var _quoted: String {
-    "\"\(self._replacing(#"\"#, with: #"\\"#)._replacing(#"""#, with: #"\""#))\""
+    _escaped._bareQuoted
+  }
+
+  fileprivate var _bareQuoted: String {
+    #""\#(self)""#
+  }
+}
+
+extension UnicodeScalar {
+  var _dslBase: String { "\\u{\(String(value, radix: 16, uppercase: true))}" }
+}
+
+/// A helper for building string literals, which handles escaping the contents
+/// appended.
+fileprivate struct StringLiteralBuilder {
+  private var contents = ""
+
+  var result: String { contents._bareQuoted }
+  var isEmpty: Bool { contents.isEmpty }
+
+  mutating func append(_ str: String) {
+    contents += str._escaped
+  }
+  mutating func append(_ c: Character) {
+    contents += String(c)._escaped
   }
+  mutating func append(unescaped str: String) {
+    contents += str
+  }
+}
+extension StringLiteralBuilder: CustomStringConvertible {
+  var description: String { result }
 }
 
-extension AST.Atom.AssertionKind {
+extension DSLTree.Atom.Assertion {
   // TODO: Some way to integrate this with conversion...
   var _patternBase: String {
     switch self {
@@ -631,6 +705,12 @@ extension AST.Atom.AssertionKind {
       return "Anchor.startOfLine"
     case .endOfLine:
       return "Anchor.endOfLine"
+    case .caretAnchor:
+      // The DSL doesn't have an equivalent to this, so print as regex.
+      return "/^/"
+    case .dollarAnchor:
+      // The DSL doesn't have an equivalent to this, so print as regex.
+      return "/$/"
     case .wordBoundary:
       return "Anchor.wordBoundary"
     case .notWordBoundary:
@@ -809,7 +889,7 @@ extension AST.Atom {
   ///
   /// TODO: Some way to integrate this with conversion...
   var _patternBase: (String, canBeWrapped: Bool) {
-    if let anchor = self.assertionKind {
+    if let anchor = self.dslAssertionKind {
       return (anchor._patternBase, false)
     }
 
@@ -821,19 +901,15 @@ extension AST.Atom {
   }
   
   var _dslBase: (String, canBeWrapped: Bool) {
-    func scalarLiteral(_ s: UnicodeScalar) -> String {
-      let hex = String(s.value, radix: 16, uppercase: true)
-      return "\\u{\(hex)}"
-    }
     switch kind {
     case let .char(c):
       return (String(c), false)
 
     case let .scalar(s):
-      return (scalarLiteral(s.value), false)
+      return (s.value._dslBase, false)
 
     case let .scalarSequence(seq):
-      return (seq.scalarValues.map(scalarLiteral).joined(), false)
+      return (seq.scalarValues.map(\._dslBase).joined(), false)
 
     case let .property(p):
       return (p._dslBase, true)
@@ -895,10 +971,11 @@ extension AST.Atom {
     case .namedCharacter:
       return (" /* TODO: named character */", false)
 
-    case .any:
-      return (".any", true)
+    case .dot:
+      // The DSL does not have an equivalent to '.', print as a regex.
+      return ("/./", false)
 
-    case .startOfLine, .endOfLine:
+    case .caretAnchor, .dollarAnchor:
       fatalError("unreachable")
 
     case .backreference:
@@ -950,10 +1027,10 @@ extension AST.Atom {
     case .namedCharacter(let n):
       return "\\N{\(n)}"
       
-    case .any:
+    case .dot:
       return "."
       
-    case .startOfLine, .endOfLine:
+    case .caretAnchor, .dollarAnchor:
       fatalError("unreachable")
       
     case .backreference:
@@ -1101,14 +1178,21 @@ extension DSLTree.Atom {
     switch self {
     case .any:
       return (".any", true)
+
+    case .anyNonNewline:
+      return (".anyNonNewline", true)
+
+    case .dot:
+      // The DSL does not have an equivalent to '.', print as a regex.
+      return ("/./", false)
       
     case let .char(c):
       return (String(c)._quoted, false)
       
     case let .scalar(s):
       let hex = String(s.value, radix: 16, uppercase: true)
-      return ("\\u{\(hex)}"._quoted, false)
-      
+      return ("\\u{\(hex)}"._bareQuoted, false)
+
     case let .unconverted(a):
       if a.ast.isUnprintableAtom {
         return ("#/\(a.ast._regexBase)/#", false)
@@ -1117,7 +1201,7 @@ extension DSLTree.Atom {
       }
       
     case .assertion(let a):
-      return (a.ast._patternBase, false)
+      return (a._patternBase, false)
       
     case .backreference(_):
       return ("/* TOOD: backreferences */", false)
@@ -1142,6 +1226,12 @@ extension DSLTree.Atom {
   var _regexBase: String {
     switch self {
     case .any:
+      return "(?s:.)"
+
+    case .anyNonNewline:
+      return "(?-s:.)"
+
+    case .dot:
       return "."
       
     case let .char(c):
@@ -1149,7 +1239,7 @@ extension DSLTree.Atom {
       
     case let .scalar(s):
       let hex = String(s.value, radix: 16, uppercase: true)
-      return "\\u{\(hex)}"._quoted
+      return "\\u{\(hex)}"._bareQuoted
       
     case let .unconverted(a):
       return a.ast._regexBase
diff --git a/Sources/_StringProcessing/Regex/ASTConversion.swift b/Sources/_StringProcessing/Regex/ASTConversion.swift
index 320d10897..4eb7bc42c 100644
--- a/Sources/_StringProcessing/Regex/ASTConversion.swift
+++ b/Sources/_StringProcessing/Regex/ASTConversion.swift
@@ -43,61 +43,7 @@ extension AST.Node {
         return .orderedChoice(children)
 
       case let .concatenation(v):
-        // Coalesce adjacent children who can produce a
-        // string literal representation
-        let astChildren = v.children
-        func coalesce(
-          _ idx: Array<AST>.Index
-        ) -> (Array<AST>.Index, String)? {
-          var result = ""
-          var idx = idx
-          while idx < astChildren.endIndex {
-            guard let atom: AST.Atom = astChildren[idx].as() else { break }
-
-            // TODO: For printing, nice to coalesce
-            // scalars literals too. We likely need a different
-            // approach even before we have a better IR.
-            if let char = atom.singleCharacter  {
-              result.append(char)
-            } else if let scalar = atom.singleScalar {
-              result.append(Character(scalar))
-            } else if case .scalarSequence(let seq) = atom.kind {
-              result += seq.scalarValues.map(Character.init)
-            } else {
-              break
-            }
-            
-            astChildren.formIndex(after: &idx)
-          }
-          return result.isEmpty ? nil : (idx, result)
-        }
-
-        // No need to nest single children concatenations
-        if astChildren.count == 1 {
-          return astChildren.first!.dslTreeNode
-        }
-
-        // Check for a single child post-coalescing
-        if let (idx, str) = coalesce(astChildren.startIndex),
-           idx == astChildren.endIndex
-        {
-          return .quotedLiteral(str)
-        }
-
-        // Coalesce adjacent string children
-        var curIdx = astChildren.startIndex
-        var children = Array<DSLTree.Node>()
-        while curIdx < astChildren.endIndex {
-          if let (nextIdx, str) = coalesce(curIdx) {
-            // TODO: Track source info...
-            children.append(.quotedLiteral(str))
-            curIdx = nextIdx
-          } else {
-            children.append(astChildren[curIdx].dslTreeNode)
-            astChildren.formIndex(after: &curIdx)
-          }
-        }
-        return .concatenation(children)
+        return .concatenation(v.children.map(\.dslTreeNode))
 
       case let .group(v):
         let child = v.child.dslTreeNode
@@ -135,10 +81,9 @@ extension AST.Node {
       case let .atom(v):
         switch v.kind {
         case .scalarSequence(let seq):
-          // Scalar sequences are splatted into concatenated scalars, which
-          // becomes a quoted literal. Sequences nested in concatenations have
-          // already been coalesced, this just handles the lone atom case.
-          return .quotedLiteral(String(seq.scalarValues.map(Character.init)))
+          // The DSL doesn't have an equivalent node for scalar sequences. Splat
+          // them into a concatenation of scalars.
+          return .concatenation(seq.scalarValues.map { .atom(.scalar($0)) })
         default:
           return .atom(v.dslTreeAtom)
         }
@@ -208,16 +153,44 @@ extension AST.CustomCharacterClass {
   }
 }
 
+extension AST.Atom.EscapedBuiltin {
+  var dslAssertionKind: DSLTree.Atom.Assertion? {
+    switch self {
+    case .wordBoundary:                   return .wordBoundary
+    case .notWordBoundary:                return .notWordBoundary
+    case .startOfSubject:                 return .startOfSubject
+    case .endOfSubject:                   return .endOfSubject
+    case .textSegment:                    return .textSegment
+    case .notTextSegment:                 return .notTextSegment
+    case .endOfSubjectBeforeNewline:      return .endOfSubjectBeforeNewline
+    case .firstMatchingPositionInSubject: return .firstMatchingPositionInSubject
+    case .resetStartOfMatch:              return .resetStartOfMatch
+    default: return nil
+    }
+  }
+}
+
+extension AST.Atom {
+  var dslAssertionKind: DSLTree.Atom.Assertion? {
+    switch kind {
+    case .caretAnchor:    return .caretAnchor
+    case .dollarAnchor:   return .dollarAnchor
+    case .escaped(let b): return b.dslAssertionKind
+    default: return nil
+    }
+  }
+}
+
 extension AST.Atom {
   var dslTreeAtom: DSLTree.Atom {
-    if let kind = assertionKind {
-      return .assertion(.init(ast: kind))
+    if let kind = dslAssertionKind {
+      return .assertion(kind)
     }
 
     switch self.kind {
     case let .char(c):                    return .char(c)
-    case let .scalar(s):                  return .char(Character(s.value))
-    case .any:                            return .any
+    case let .scalar(s):                  return .scalar(s.value)
+    case .dot:                            return .dot
     case let .backreference(r):           return .backreference(.init(ast: r))
     case let .changeMatchingOptions(seq): return .changeMatchingOptions(.init(ast: seq))
 
diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift
index 740bdcb8d..520f4991a 100644
--- a/Sources/_StringProcessing/Regex/DSLTree.swift
+++ b/Sources/_StringProcessing/Regex/DSLTree.swift
@@ -117,11 +117,11 @@ extension DSLTree {
     var members: [Member]
     var isInverted: Bool
     
-    var containsAny: Bool {
+    var containsDot: Bool {
       members.contains { member in
         switch member {
-        case .atom(.any): return true
-        case .custom(let ccc): return ccc.containsAny
+        case .atom(.dot): return true
+        case .custom(let ccc): return ccc.containsDot
         default:
           return false
         }
@@ -159,95 +159,25 @@ extension DSLTree {
       indirect case subtraction(CustomCharacterClass, CustomCharacterClass)
       indirect case symmetricDifference(CustomCharacterClass, CustomCharacterClass)
     }
-    
-    internal struct AsciiBitset {
-      let isInverted: Bool
-      var a: UInt64 = 0
-      var b: UInt64 = 0
-
-      init(isInverted: Bool) {
-        self.isInverted = isInverted
-      }
-
-      init(_ val: UInt8, _ isInverted: Bool, _ isCaseInsensitive: Bool) {
-        self.isInverted = isInverted
-        add(val, isCaseInsensitive)
-      }
-
-      init(low: UInt8, high: UInt8, isInverted: Bool, isCaseInsensitive: Bool) {
-        self.isInverted = isInverted
-        for val in low...high {
-          add(val, isCaseInsensitive)
-        }
-      }
-
-      internal init(
-        a: UInt64,
-        b: UInt64,
-        isInverted: Bool
-      ) {
-        self.isInverted = isInverted
-        self.a = a
-        self.b = b
-      }
-
-      internal mutating func add(_ val: UInt8, _ isCaseInsensitive: Bool) {
-        setBit(val)
-        if isCaseInsensitive {
-          switch val {
-            case 64...90: setBit(val + 32)
-            case 97...122: setBit(val - 32)
-            default: break
-          }
-        }
-      }
-
-      internal mutating func setBit(_ val: UInt8) {
-        if val < 64 {
-          a = a | 1 << val
-        } else {
-          b = b | 1 << (val - 64)
-        }
-      }
-
-      internal func matches(char: Character) -> Bool {
-        let ret: Bool
-        if let val = char.asciiValue {
-          if val < 64 {
-            ret = (a >> val) & 1 == 1
-          } else {
-            ret =  (b >> (val - 64)) & 1 == 1
-          }
-        } else {
-          ret = false
-        }
-
-        if isInverted {
-          return !ret
-        }
-
-        return ret
-      }
-
-      /// Joins another bitset from a Member of the same CustomCharacterClass
-      internal func union(_ other: AsciiBitset) -> AsciiBitset {
-        precondition(self.isInverted == other.isInverted)
-        return AsciiBitset(
-          a: self.a | other.a,
-          b: self.b | other.b,
-          isInverted: self.isInverted
-        )
-      }
-    }
   }
 
   @_spi(RegexBuilder)
   public enum Atom {
     case char(Character)
     case scalar(Unicode.Scalar)
+
+    /// Any character, including newlines.
     case any
 
-    case assertion(_AST.AssertionKind)
+    /// Any character, excluding newlines. This differs from '.', as it is not
+    /// affected by single line mode.
+    case anyNonNewline
+
+    /// The DSL representation of '.' in a regex literal. This does not match
+    /// newlines unless single line mode is enabled.
+    case dot
+
+    case assertion(Assertion)
     case backreference(_AST.Reference)
     case symbolicReference(ReferenceID)
 
@@ -257,6 +187,52 @@ extension DSLTree {
   }
 }
 
+extension DSLTree.Atom {
+  @_spi(RegexBuilder)
+  public enum Assertion: Hashable {
+    /// \A
+    case startOfSubject
+
+    /// \Z
+    case endOfSubjectBeforeNewline
+
+    /// \z
+    case endOfSubject
+
+    /// \K
+    case resetStartOfMatch
+
+    /// \G
+    case firstMatchingPositionInSubject
+
+    /// \y
+    case textSegment
+
+    /// \Y
+    case notTextSegment
+
+    /// The DSL's Anchor.startOfLine, which matches the start of a line
+    /// even if `anchorsMatchNewlines` is false.
+    case startOfLine
+
+    /// The DSL's Anchor.endOfLine, which matches the end of a line
+    /// even if `anchorsMatchNewlines` is false.
+    case endOfLine
+
+    /// ^
+    case caretAnchor
+
+    /// $
+    case dollarAnchor
+
+    /// \b (from outside a custom character class)
+    case wordBoundary
+
+    /// \B
+    case notWordBoundary
+  }
+}
+
 extension Unicode.GeneralCategory {
   var extendedGeneralCategory: Unicode.ExtendedGeneralCategory? {
     switch self {
@@ -358,6 +334,14 @@ extension DSLTree.Node {
     default: return nil
     }
   }
+
+  /// If this node is for a converted literal, look through it.
+  var lookingThroughConvertedLiteral: Self {
+    switch self {
+    case let .convertedRegexLiteral(n, _): return n
+    default: return self
+    }
+  }
 }
 
 extension DSLTree.Atom {
@@ -773,40 +757,6 @@ extension DSLTree {
       internal var ast: AST.AbsentFunction
     }
     
-    @_spi(RegexBuilder)
-    public struct AssertionKind {
-      internal var ast: AST.Atom.AssertionKind
-      
-      public static func startOfSubject(_ inverted: Bool = false) -> Self {
-        .init(ast: .startOfSubject)
-      }
-      public static func endOfSubjectBeforeNewline(_ inverted: Bool = false) -> Self {
-        .init(ast: .endOfSubjectBeforeNewline)
-      }
-      public static func endOfSubject(_ inverted: Bool = false) -> Self {
-        .init(ast: .endOfSubject)
-      }
-      public static func firstMatchingPositionInSubject(_ inverted: Bool = false) -> Self {
-        .init(ast: .firstMatchingPositionInSubject)
-      }
-      public static func textSegmentBoundary(_ inverted: Bool = false) -> Self {
-        inverted
-          ? .init(ast: .notTextSegment)
-          : .init(ast: .textSegment)
-      }
-      public static func startOfLine(_ inverted: Bool = false) -> Self {
-        .init(ast: .startOfLine)
-      }
-      public static func endOfLine(_ inverted: Bool = false) -> Self {
-        .init(ast: .endOfLine)
-      }
-      public static func wordBoundary(_ inverted: Bool = false) -> Self {
-        inverted
-          ? .init(ast: .notWordBoundary)
-          : .init(ast: .wordBoundary)
-      }
-    }
-    
     @_spi(RegexBuilder)
     public struct Reference {
       internal var ast: AST.Reference
@@ -820,6 +770,31 @@ extension DSLTree {
     @_spi(RegexBuilder)
     public struct Atom {
       internal var ast: AST.Atom
+
+      // FIXME: The below APIs should be removed once the DSL tree has been
+      // migrated to use proper DSL atoms for them.
+
+      public static var _anyGrapheme: Self {
+        .init(ast: .init(.escaped(.graphemeCluster), .fake))
+      }
+      public static var _whitespace: Self {
+        .init(ast: .init(.escaped(.whitespace), .fake))
+      }
+      public static var _digit: Self {
+        .init(ast: .init(.escaped(.decimalDigit), .fake))
+      }
+      public static var _horizontalWhitespace: Self {
+        .init(ast: .init(.escaped(.horizontalWhitespace), .fake))
+      }
+      public static var _newlineSequence: Self {
+        .init(ast: .init(.escaped(.newlineSequence), .fake))
+      }
+      public static var _verticalWhitespace: Self {
+        .init(ast: .init(.escaped(.verticalTab), .fake))
+      }
+      public static var _word: Self {
+        .init(ast: .init(.escaped(.wordCharacter), .fake))
+      }
     }
   }
 }
@@ -832,7 +807,8 @@ extension DSLTree.Atom {
     switch self {
     case .changeMatchingOptions, .assertion:
       return false
-    case .char, .scalar, .any, .backreference, .symbolicReference, .unconverted:
+    case .char, .scalar, .any, .anyNonNewline, .dot, .backreference,
+        .symbolicReference, .unconverted:
       return true
     }
   }
diff --git a/Sources/_StringProcessing/Regex/Options.swift b/Sources/_StringProcessing/Regex/Options.swift
index 24d5c422e..88d2dbf5d 100644
--- a/Sources/_StringProcessing/Regex/Options.swift
+++ b/Sources/_StringProcessing/Regex/Options.swift
@@ -12,7 +12,7 @@
 @_implementationOnly import _RegexParser
 
 @available(SwiftStdlib 5.7, *)
-extension RegexComponent {
+extension Regex {
   /// Returns a regular expression that ignores case when matching.
   ///
   /// - Parameter ignoresCase: A Boolean value indicating whether to ignore case.
@@ -65,7 +65,7 @@ extension RegexComponent {
   /// - Parameter wordBoundaryKind: The algorithm to use for determining word boundaries.
   /// - Returns: The modified regular expression.
   public func wordBoundaryKind(_ wordBoundaryKind: RegexWordBoundaryKind) -> Regex<RegexOutput> {
-    wrapInOption(.unicodeWordBoundaries, addingIf: wordBoundaryKind == .unicodeLevel2)
+    wrapInOption(.unicodeWordBoundaries, addingIf: wordBoundaryKind == .default)
   }
   
   /// Returns a regular expression where the start and end of input
@@ -83,8 +83,8 @@ extension RegexComponent {
   ///
   /// This method corresponds to applying the `m` option in regex syntax. For
   /// this behavior in the `RegexBuilder` syntax, see
-  /// ``Anchor.startOfLine``, ``Anchor.endOfLine``, ``Anchor.startOfInput``,
-  /// and ``Anchor.endOfInput``.
+  /// ``Anchor.startOfLine``, ``Anchor.endOfLine``, ``Anchor.startOfSubject``,
+  /// and ``Anchor.endOfSubject``.
   ///
   /// - Parameter matchLineEndings: A Boolean value indicating whether `^` and
   ///   `$` should match the start and end of lines, respectively.
@@ -205,7 +205,7 @@ public struct RegexWordBoundaryKind: Hashable {
   /// that match `/\w\W/` or `/\W\w/`, or between the start or end of the input
   /// and a `\w` character. Word boundaries therefore depend on the option-
   /// defined behavior of `\w`.
-  public static var unicodeLevel1: Self {
+  public static var simple: Self {
     .init(base: .unicodeLevel1)
   }
 
@@ -215,7 +215,7 @@ public struct RegexWordBoundaryKind: Hashable {
   /// Default word boundaries use a Unicode algorithm that handles some cases
   /// better than simple word boundaries, such as words with internal
   /// punctuation, changes in script, and Emoji.
-  public static var unicodeLevel2: Self {
+  public static var `default`: Self {
     .init(base: .unicodeLevel2)
   }
 }
diff --git a/Sources/_StringProcessing/Unicode/CharacterProps.swift b/Sources/_StringProcessing/Unicode/CharacterProps.swift
index 80f6819a6..e0be4e386 100644
--- a/Sources/_StringProcessing/Unicode/CharacterProps.swift
+++ b/Sources/_StringProcessing/Unicode/CharacterProps.swift
@@ -11,10 +11,3 @@
 
 
 // TODO
-
-extension Character {
-  /// Whether this character is made up of exactly one Unicode scalar value.
-  var hasExactlyOneScalar: Bool {
-    unicodeScalars.index(after: unicodeScalars.startIndex) == unicodeScalars.endIndex
-  }
-}
diff --git a/Sources/_StringProcessing/Unicode/NFC.swift b/Sources/_StringProcessing/Unicode/NFC.swift
new file mode 100644
index 000000000..5c2c4aa48
--- /dev/null
+++ b/Sources/_StringProcessing/Unicode/NFC.swift
@@ -0,0 +1,55 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2022 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+//
+//===----------------------------------------------------------------------===//
+
+@_spi(_Unicode)
+import Swift
+
+extension UnicodeScalar {
+  /// Checks whether the scalar is in NFC form.
+  var isNFC: Bool { Character(self).singleNFCScalar == self }
+}
+
+extension Character {
+  /// If the given character consists of a single NFC scalar, returns it. If
+  /// there are multiple NFC scalars, returns `nil`.
+  var singleNFCScalar: UnicodeScalar? {
+    // SwiftStdlib is always >= 5.7 for a shipped StringProcessing.
+    guard #available(SwiftStdlib 5.7, *) else { return nil }
+    var nfcIter = String(self)._nfc.makeIterator()
+    guard let scalar = nfcIter.next(), nfcIter.next() == nil else { return nil }
+    return scalar
+  }
+
+  /// If the given character contains a single scalar, returns it. If none or
+  /// multiple scalars are present, returns `nil`.
+  var singleScalar: UnicodeScalar? {
+    hasExactlyOneScalar ? unicodeScalars.first! : nil
+  }
+}
+
+extension String {
+  /// If the given string consists of a single NFC scalar, returns it. If none
+  /// or multiple NFC scalars are present, returns `nil`.
+  var singleNFCScalar: UnicodeScalar? {
+    guard !isEmpty && index(after: startIndex) == endIndex else { return nil }
+    return first!.singleNFCScalar
+  }
+
+  /// If the given string contains a single scalar, returns it. If none or
+  /// multiple scalars are present, returns `nil`.
+  var singleScalar: UnicodeScalar? {
+    let scalars = unicodeScalars
+    guard !scalars.isEmpty &&
+          scalars.index(after: scalars.startIndex) == scalars.endIndex
+    else { return nil }
+    return scalars.first!
+  }
+}
diff --git a/Sources/_StringProcessing/Utility/AsciiBitset.swift b/Sources/_StringProcessing/Utility/AsciiBitset.swift
new file mode 100644
index 000000000..ad3159820
--- /dev/null
+++ b/Sources/_StringProcessing/Utility/AsciiBitset.swift
@@ -0,0 +1,99 @@
+extension DSLTree.CustomCharacterClass {
+  internal struct AsciiBitset {
+    let isInverted: Bool
+    var a: UInt64 = 0
+    var b: UInt64 = 0
+
+    init(isInverted: Bool) {
+      self.isInverted = isInverted
+    }
+
+    init(_ val: UInt8, _ isInverted: Bool, _ isCaseInsensitive: Bool) {
+      self.isInverted = isInverted
+      add(val, isCaseInsensitive)
+    }
+
+    init(low: UInt8, high: UInt8, isInverted: Bool, isCaseInsensitive: Bool) {
+      self.isInverted = isInverted
+      for val in low...high {
+        add(val, isCaseInsensitive)
+      }
+    }
+
+    internal init(
+      a: UInt64,
+      b: UInt64,
+      isInverted: Bool
+    ) {
+      self.isInverted = isInverted
+      self.a = a
+      self.b = b
+    }
+
+    internal mutating func add(_ val: UInt8, _ isCaseInsensitive: Bool) {
+      setBit(val)
+      if isCaseInsensitive {
+        switch val {
+          case 64...90: setBit(val + 32)
+          case 97...122: setBit(val - 32)
+          default: break
+        }
+      }
+    }
+
+    internal mutating func setBit(_ val: UInt8) {
+      if val < 64 {
+        a = a | 1 << val
+      } else {
+        b = b | 1 << (val - 64)
+      }
+    }
+
+    private func matches(_ val: UInt8) -> Bool {
+      if val < 64 {
+        return (a >> val) & 1 == 1
+      } else {
+        return (b >> (val - 64)) & 1 == 1
+      }
+    }
+
+    internal func matches(char: Character) -> Bool {
+      let matched: Bool
+      if let val = char._singleScalarAsciiValue {
+        matched = matches(val)
+      } else {
+        matched = false
+      }
+
+      if isInverted {
+        return !matched
+      }
+      return matched
+    }
+
+    internal func matches(scalar: Unicode.Scalar) -> Bool {
+      let matched: Bool
+      if scalar.isASCII {
+        let val = UInt8(ascii: scalar)
+        matched = matches(val)
+      } else {
+        matched = false
+      }
+
+      if isInverted {
+        return !matched
+      }
+      return matched
+    }
+
+    /// Joins another bitset from a Member of the same CustomCharacterClass
+    internal func union(_ other: AsciiBitset) -> AsciiBitset {
+      precondition(self.isInverted == other.isInverted)
+      return AsciiBitset(
+        a: self.a | other.a,
+        b: self.b | other.b,
+        isInverted: self.isInverted
+      )
+    }
+  }
+}
diff --git a/Sources/_StringProcessing/Utility/Misc.swift b/Sources/_StringProcessing/Utility/Misc.swift
new file mode 100644
index 000000000..8a9cbe325
--- /dev/null
+++ b/Sources/_StringProcessing/Utility/Misc.swift
@@ -0,0 +1,59 @@
+//===----------------------------------------------------------------------===//
+//
+// This source file is part of the Swift.org open source project
+//
+// Copyright (c) 2022 Apple Inc. and the Swift project authors
+// Licensed under Apache License v2.0 with Runtime Library Exception
+//
+// See https://swift.org/LICENSE.txt for license information
+//
+//===----------------------------------------------------------------------===//
+
+extension Array {
+  /// Coalesce adjacent elements using a given accumulator. The accumulator is
+  /// transformed into elements of the array by `finish`. The `accumulate`
+  /// function should return `true` if the accumulator has coalesced the
+  /// element, `false` otherwise.
+  func coalescing<T>(
+    with initialAccumulator: T, into finish: (T) -> Self,
+    accumulate: (inout T, Element) -> Bool
+  ) -> Self {
+    var didAccumulate = false
+    var accumulator = initialAccumulator
+
+    var result = Self()
+    for elt in self {
+      if accumulate(&accumulator, elt) {
+        // The element has been coalesced into accumulator, there is nothing
+        // else to do.
+        didAccumulate = true
+        continue
+      }
+      if didAccumulate {
+        // We have a leftover accumulator, which needs to be finished before we
+        // can append the next element.
+        result += finish(accumulator)
+        accumulator = initialAccumulator
+        didAccumulate = false
+      }
+      result.append(elt)
+    }
+    // Handle a leftover accumulation.
+    if didAccumulate {
+      result += finish(accumulator)
+    }
+    return result
+  }
+
+  /// Coalesce adjacent elements using a given accumulator. The accumulator is
+  /// transformed into an element of the array by `finish`. The `accumulate`
+  /// function should return `true` if the accumulator has coalesced the
+  /// element, `false` otherwise.
+  func coalescing<T>(
+    with initialAccumulator: T, into finish: (T) -> Element,
+    accumulate: (inout T, Element) -> Bool
+  ) -> Self {
+    coalescing(
+      with: initialAccumulator, into: { [finish($0) ]}, accumulate: accumulate)
+  }
+}
diff --git a/Sources/_StringProcessing/Utility/RegexFactory.swift b/Sources/_StringProcessing/Utility/RegexFactory.swift
index 693b04966..31245c0f7 100644
--- a/Sources/_StringProcessing/Utility/RegexFactory.swift
+++ b/Sources/_StringProcessing/Utility/RegexFactory.swift
@@ -40,7 +40,7 @@ public struct _RegexFactory {
   @_spi(RegexBuilder)
   @available(SwiftStdlib 5.7, *)
   public func assertion<Output>(
-    _ kind: DSLTree._AST.AssertionKind
+    _ kind: DSLTree.Atom.Assertion
   ) -> Regex<Output> {
     .init(node: .atom(.assertion(kind)))
   }
diff --git a/Sources/_StringProcessing/_CharacterClassModel.swift b/Sources/_StringProcessing/_CharacterClassModel.swift
index db2088782..9f515f220 100644
--- a/Sources/_StringProcessing/_CharacterClassModel.swift
+++ b/Sources/_StringProcessing/_CharacterClassModel.swift
@@ -15,8 +15,7 @@
 // an AST, but this isn't a natural thing to produce in the context
 // of parsing or to store in an AST
 
-@_spi(RegexBuilder)
-public struct _CharacterClassModel: Hashable {
+struct _CharacterClassModel: Hashable {
   /// The actual character class to match.
   var cc: Representation
   
@@ -28,7 +27,7 @@ public struct _CharacterClassModel: Hashable {
   var isInverted: Bool = false
 
   // TODO: Split out builtin character classes into their own type?
-  public enum Representation: Hashable {
+  enum Representation: Hashable {
     /// Any character
     case any
     /// Any grapheme cluster
@@ -50,74 +49,6 @@ public struct _CharacterClassModel: Hashable {
     case whitespace
     /// Character.isLetter or Character.isDigit or Character == "_"
     case word
-    /// One of the custom character set.
-    case custom([CharacterSetComponent])
-  }
-
-  public enum SetOperator: Hashable {
-    case subtraction
-    case intersection
-    case symmetricDifference
-  }
-
-  /// A binary set operation that forms a character class component.
-  public struct SetOperation: Hashable {
-    var lhs: CharacterSetComponent
-    var op: SetOperator
-    var rhs: CharacterSetComponent
-
-    func matches(_ c: Character, with options: MatchingOptions) -> Bool {
-      switch op {
-      case .intersection:
-        return lhs.matches(c, with: options) && rhs.matches(c, with: options)
-      case .subtraction:
-        return lhs.matches(c, with: options) && !rhs.matches(c, with: options)
-      case .symmetricDifference:
-        return lhs.matches(c, with: options) != rhs.matches(c, with: options)
-      }
-    }
-  }
-
-  public enum CharacterSetComponent: Hashable {
-    case character(Character)
-    case range(ClosedRange<Character>)
-
-    /// A nested character class.
-    case characterClass(_CharacterClassModel)
-
-    /// A binary set operation of character class components.
-    indirect case setOperation(SetOperation)
-
-    public static func setOperation(
-      lhs: CharacterSetComponent, op: SetOperator, rhs: CharacterSetComponent
-    ) -> CharacterSetComponent {
-      .setOperation(.init(lhs: lhs, op: op, rhs: rhs))
-    }
-
-    func matches(_ character: Character, with options: MatchingOptions) -> Bool {
-      switch self {
-      case .character(let c):
-        if options.isCaseInsensitive {
-          return c.lowercased() == character.lowercased()
-        } else {
-          return c == character
-        }
-      case .range(let range):
-        if options.isCaseInsensitive {
-          let newLower = range.lowerBound.lowercased()
-          let newUpper = range.upperBound.lowercased()
-          // FIXME: Is failing this possible? Is this the right behavior if so?
-          guard newLower <= newUpper else { return false }
-          return (newLower...newUpper).contains(character.lowercased())
-        } else {
-          return range.contains(character)
-        }
-      case .characterClass(let custom):
-        let str = String(character)
-        return custom.matches(in: str, at: str.startIndex, with: options) != nil
-      case .setOperation(let op): return op.matches(character, with: options)
-      }
-    }
   }
 
   enum MatchLevel: Hashable {
@@ -153,7 +84,7 @@ public struct _CharacterClassModel: Hashable {
   }
 
   /// Inverts a character class.
-  public var inverted: Self {
+  var inverted: Self {
     return withInversion(true)
   }
   
@@ -188,8 +119,6 @@ public struct _CharacterClassModel: Hashable {
         matched = c.isWhitespace && (c.isASCII || !options.usesASCIISpaces)
       case .word:
         matched = c.isWordCharacter && (c.isASCII || !options.usesASCIIWord)
-      case .custom(let set):
-        matched = set.any { $0.matches(c, with: options) }
       }
       if isInverted {
         matched.toggle()
@@ -222,8 +151,6 @@ public struct _CharacterClassModel: Hashable {
         matched = c.properties.isWhitespace && (c.isASCII || !options.usesASCIISpaces)
       case .word:
         matched = (c.properties.isAlphabetic || c == "_") && (c.isASCII || !options.usesASCIIWord)
-      case .custom(let set):
-        matched = set.any { $0.matches(Character(c), with: options) }
       }
       if isInverted {
         matched.toggle()
@@ -233,80 +160,50 @@ public struct _CharacterClassModel: Hashable {
   }
 }
 
-@available(SwiftStdlib 5.7, *)
-extension _CharacterClassModel: RegexComponent {
-  public typealias RegexOutput = Substring
-
-  public var regex: Regex<RegexOutput> {
-    guard let ast = self.makeAST() else {
-      fatalError("FIXME: extended AST?")
-    }
-    return Regex(ast: ast)
-  }
-}
-
-@_spi(RegexBuilder)
 extension _CharacterClassModel {
-  public static var any: _CharacterClassModel {
+  static var any: _CharacterClassModel {
     .init(cc: .any, matchLevel: .graphemeCluster)
   }
 
-  public static var anyGrapheme: _CharacterClassModel {
+  static var anyGrapheme: _CharacterClassModel {
     .init(cc: .anyGrapheme, matchLevel: .graphemeCluster)
   }
 
-  public static var anyUnicodeScalar: _CharacterClassModel {
+  static var anyUnicodeScalar: _CharacterClassModel {
     .init(cc: .any, matchLevel: .unicodeScalar)
   }
 
-  public static var whitespace: _CharacterClassModel {
+  static var whitespace: _CharacterClassModel {
     .init(cc: .whitespace, matchLevel: .graphemeCluster)
   }
   
-  public static var digit: _CharacterClassModel {
+  static var digit: _CharacterClassModel {
     .init(cc: .digit, matchLevel: .graphemeCluster)
   }
   
-  public static var hexDigit: _CharacterClassModel {
+  static var hexDigit: _CharacterClassModel {
     .init(cc: .hexDigit, matchLevel: .graphemeCluster)
   }
 
-  public static var horizontalWhitespace: _CharacterClassModel {
+  static var horizontalWhitespace: _CharacterClassModel {
     .init(cc: .horizontalWhitespace, matchLevel: .graphemeCluster)
   }
 
-  public static var newlineSequence: _CharacterClassModel {
+  static var newlineSequence: _CharacterClassModel {
     .init(cc: .newlineSequence, matchLevel: .graphemeCluster)
   }
 
-  public static var verticalWhitespace: _CharacterClassModel {
+  static var verticalWhitespace: _CharacterClassModel {
     .init(cc: .verticalWhitespace, matchLevel: .graphemeCluster)
   }
 
-  public static var word: _CharacterClassModel {
+  static var word: _CharacterClassModel {
     .init(cc: .word, matchLevel: .graphemeCluster)
   }
-
-  public static func custom(
-    _ components: [_CharacterClassModel.CharacterSetComponent]
-  ) -> _CharacterClassModel {
-    .init(cc: .custom(components), matchLevel: .graphemeCluster)
-  }
-}
-
-extension _CharacterClassModel.CharacterSetComponent: CustomStringConvertible {
-  public var description: String {
-    switch self {
-    case .range(let range): return "<range \(range)>"
-    case .character(let character): return "<character \(character)>"
-    case .characterClass(let custom): return "\(custom)"
-    case .setOperation(let op): return "<\(op.lhs) \(op.op) \(op.rhs)>"
-    }
-  }
 }
 
 extension _CharacterClassModel.Representation: CustomStringConvertible {
-  public var description: String {
+  var description: String {
     switch self {
     case .any: return "<any>"
     case .anyGrapheme: return "<any grapheme>"
@@ -318,95 +215,16 @@ extension _CharacterClassModel.Representation: CustomStringConvertible {
     case .verticalWhitespace: return "vertical whitespace"
     case .whitespace: return "<whitespace>"
     case .word: return "<word>"
-    case .custom(let set): return "<custom \(set)>"
     }
   }
 }
 
 extension _CharacterClassModel: CustomStringConvertible {
-  public var description: String {
+  var description: String {
     return "\(isInverted ? "not " : "")\(cc)"
   }
 }
 
-extension _CharacterClassModel {
-  public func makeDSLTreeCharacterClass() -> DSLTree.CustomCharacterClass? {
-    // FIXME: Implement in DSLTree instead of wrapping an AST atom
-    switch makeAST() {
-    case .atom(let atom):
-      return .init(members: [.atom(.unconverted(.init(ast: atom)))])
-    default:
-      return nil
-    }
-  }
-  
-  internal func makeAST() -> AST.Node? {
-    let inv = isInverted
-
-    func esc(_ b: AST.Atom.EscapedBuiltin) -> AST.Node {
-      escaped(b)
-    }
-
-    switch cc {
-    case .any: return atom(.any)
-
-    case .digit:
-      return esc(inv ? .notDecimalDigit : .decimalDigit)
-
-    case .horizontalWhitespace:
-      return esc(
-        inv ? .notHorizontalWhitespace : .horizontalWhitespace)
-
-    // FIXME: newline sequence is not same as \n
-    case .newlineSequence:
-      return esc(inv ? .notNewline : .newline)
-
-    case .whitespace:
-      return esc(inv ? .notWhitespace : .whitespace)
-
-    case .verticalWhitespace:
-      return esc(inv ? .notVerticalTab : .verticalTab)
-
-    case .word:
-      return esc(inv ? .notWordCharacter : .wordCharacter)
-
-    case .anyGrapheme:
-      return esc(.graphemeCluster)
-
-    case .hexDigit:
-      let members: [AST.CustomCharacterClass.Member] = [
-        range_m(.char("a"), .char("f")),
-        range_m(.char("A"), .char("F")),
-        range_m(.char("0"), .char("9")),
-      ]
-      let ccc = AST.CustomCharacterClass(
-        .init(faking: inv ? .inverted : .normal),
-        members,
-        .fake)
-
-      return .customCharacterClass(ccc)
-
-    default: return nil
-    }
-  }
-}
-
-extension DSLTree.Node {
-  var characterClass: _CharacterClassModel? {
-    switch self {
-    case let .customCharacterClass(ccc):
-      return ccc.modelCharacterClass
-    case let .atom(a):
-      return a.characterClass
-    case .characterPredicate:
-      // FIXME: Do we make one from this?
-      return nil
-    default:
-      return nil
-    }
-  }
-}
-
 extension _CharacterClassModel {
   func withMatchLevel(
     _ level: _CharacterClassModel.MatchLevel
@@ -417,17 +235,6 @@ extension _CharacterClassModel {
   }
 }
 
-extension DSLTree.Atom {
-    var characterClass: _CharacterClassModel? {
-    switch self {
-    case let .unconverted(a):
-      return a.ast.characterClass
-
-    default: return nil
-    }
-  }
-}
-
 extension AST.Atom {
     var characterClass: _CharacterClassModel? {
     switch kind {
@@ -438,8 +245,8 @@ extension AST.Atom {
       // this? Or does grapheme-semantic mode complicate that?
       return nil
       
-    case .any:
-      // `.any` is handled in the matching engine by Compiler.emitAny() and in
+    case .dot:
+      // `.dot` is handled in the matching engine by Compiler.emitDot() and in
       // the legacy compiler by the `.any` instruction, which can provide lower
       // level instructions than the CharacterClass-generated consumer closure
       //
@@ -468,7 +275,7 @@ extension AST.Atom.EscapedBuiltin {
 
     // FIXME: This is more like '.' than inverted '\R', as it is affected
     // by e.g (*CR). We should therefore really be emitting it through
-    // emitAny(). For now we treat it as semantically invalid.
+    // emitDot(). For now we treat it as semantically invalid.
     case .notNewline: return .newlineSequence.inverted
 
     case .whitespace:    return .whitespace
@@ -489,81 +296,6 @@ extension AST.Atom.EscapedBuiltin {
   }
 }
 
-extension DSLTree.CustomCharacterClass {
-  // TODO: Refactor a bit, and... can we drop this type?
-  var modelCharacterClass: _CharacterClassModel? {
-    var result =
-      Array<_CharacterClassModel.CharacterSetComponent>()
-    for m in members {
-      switch m {
-      case let .atom(a):
-        if let cc = a.characterClass {
-          result.append(.characterClass(cc))
-        } else if let c = a.literalCharacterValue {
-          result.append(.character(c))
-        } else {
-          return nil
-        }
-      case let .range(low, high):
-        guard let lhs = low.literalCharacterValue,
-              let rhs = high.literalCharacterValue
-        else {
-          return nil
-        }
-        result.append(.range(lhs...rhs))
-
-      case let .custom(ccc):
-        guard let cc = ccc.modelCharacterClass else {
-          return nil
-        }
-        result.append(.characterClass(cc))
-
-      case let .intersection(lhs, rhs):
-        guard let lhs = lhs.modelCharacterClass,
-              let rhs = rhs.modelCharacterClass
-        else {
-          return nil
-        }
-        result.append(.setOperation(
-          lhs: .characterClass(lhs),
-          op: .intersection,
-          rhs: .characterClass(rhs)))
-
-      case let .subtraction(lhs, rhs):
-        guard let lhs = lhs.modelCharacterClass,
-              let rhs = rhs.modelCharacterClass
-        else {
-          return nil
-        }
-        result.append(.setOperation(
-          lhs: .characterClass(lhs),
-          op: .subtraction,
-          rhs: .characterClass(rhs)))
-
-      case let .symmetricDifference(lhs, rhs):
-        guard let lhs = lhs.modelCharacterClass,
-              let rhs = rhs.modelCharacterClass
-        else {
-          return nil
-        }
-        result.append(.setOperation(
-          lhs: .characterClass(lhs),
-          op: .symmetricDifference,
-          rhs: .characterClass(rhs)))
-
-      case let .quotedLiteral(s):
-        // Decompose quoted literal into literal characters.
-        result += s.map { .character($0) }
-
-      case .trivia:
-        break
-      }
-    }
-    let cc = _CharacterClassModel.custom(result)
-    return isInverted ? cc.inverted : cc
-  }
-}
-
 extension _CharacterClassModel {
   // FIXME: Calling on inverted sets wont be the same as the
   // inverse of a boundary if at the start or end of the
diff --git a/Tests/RegexBuilderTests/RegexDSLTests.swift b/Tests/RegexBuilderTests/RegexDSLTests.swift
index b67c6c242..e25f2df05 100644
--- a/Tests/RegexBuilderTests/RegexDSLTests.swift
+++ b/Tests/RegexBuilderTests/RegexDSLTests.swift
@@ -12,6 +12,7 @@
 import XCTest
 import _StringProcessing
 import RegexBuilder
+import TestSupport
 
 class RegexDSLTests: XCTestCase {
   func _testDSLCaptures<Content: RegexComponent, MatchType>(
@@ -69,7 +70,13 @@ class RegexDSLTests: XCTestCase {
     XCTAssertTrue(match.output == substringMatch.output)
   }
 
+  let allNewlines = "\u{A}\u{B}\u{C}\u{D}\r\n\u{85}\u{2028}\u{2029}"
+  let asciiNewlines = "\u{A}\u{B}\u{C}\u{D}\r\n"
+
   func testCharacterClasses() throws {
+    // Must have new stdlib for character class ranges.
+    guard ensureNewStdlib() else { return }
+
     try _testDSLCaptures(
       ("a c", ("a c", " ", "c")),
       matchType: (Substring, Substring, Substring).self, ==)
@@ -110,9 +117,143 @@ class RegexDSLTests: XCTestCase {
         CharacterClass.whitespace.inverted
       }
     }
+
+    // `.newlineSequence` and `.verticalWhitespace` match the same set of
+    // newlines in grapheme semantic mode, and scalar mode when applied with
+    // OneOrMore.
+    for cc in [CharacterClass.newlineSequence, .verticalWhitespace] {
+      for mode in [RegexSemanticLevel.unicodeScalar, .graphemeCluster] {
+        try _testDSLCaptures(
+          ("\n", ("\n", "\n")),
+          ("\r", ("\r", "\r")),
+          ("\r\n", ("\r\n", "\r\n")),
+          (allNewlines, (allNewlines[...], allNewlines[...])),
+          ("abc\ndef", ("abc\ndef", "\n")),
+          ("abc\n\r\ndef", ("abc\n\r\ndef", "\n\r\n")),
+          ("abc\(allNewlines)def", ("abc\(allNewlines)def", allNewlines[...])),
+          ("abc", nil),
+          matchType: (Substring, Substring).self, ==)
+        {
+          Regex {
+            ZeroOrMore {
+              cc.inverted
+            }
+            Capture {
+              OneOrMore(cc)
+            }
+            ZeroOrMore {
+              cc.inverted
+            }
+          }.matchingSemantics(mode)
+        }
+
+        // Try with ASCII-only whitespace.
+        try _testDSLCaptures(
+          ("\n", ("\n", "\n")),
+          ("\r", ("\r", "\r")),
+          ("\r\n", ("\r\n", "\r\n")),
+          (allNewlines, (allNewlines[...], asciiNewlines[...])),
+          ("abc\ndef", ("abc\ndef", "\n")),
+          ("abc\n\r\ndef", ("abc\n\r\ndef", "\n\r\n")),
+          ("abc\(allNewlines)def", ("abc\(allNewlines)def", asciiNewlines[...])),
+          ("abc", nil),
+          matchType: (Substring, Substring).self, ==)
+        {
+          Regex {
+            ZeroOrMore {
+              cc.inverted
+            }
+            Capture {
+              OneOrMore(cc)
+            }
+            ZeroOrMore {
+              cc.inverted
+            }
+          }.matchingSemantics(mode).asciiOnlyWhitespace()
+        }
+      }
+    }
+
+    // `.newlineSequence` in scalar mode may match a single `\r\n`.
+    // `.verticalWhitespace` may not.
+    for asciiOnly in [true, false] {
+      try _testDSLCaptures(
+        ("\r", "\r"),
+        ("\r\n", "\r\n"),
+        matchType: Substring.self, ==)
+      {
+        Regex {
+          CharacterClass.newlineSequence
+        }.matchingSemantics(.unicodeScalar).asciiOnlyWhitespace(asciiOnly)
+      }
+      try _testDSLCaptures(
+        ("\r", nil),
+        ("\r\n", nil),
+        matchType: Substring.self, ==)
+      {
+        Regex {
+          CharacterClass.newlineSequence.inverted
+        }.matchingSemantics(.unicodeScalar).asciiOnlyWhitespace(asciiOnly)
+      }
+      try _testDSLCaptures(
+        ("\r", "\r"),
+        ("\r\n", nil),
+        matchType: Substring.self, ==)
+      {
+        Regex {
+          CharacterClass.verticalWhitespace
+        }.matchingSemantics(.unicodeScalar).asciiOnlyWhitespace(asciiOnly)
+      }
+      try _testDSLCaptures(
+        ("\r", nil),
+        ("\r\n", nil),
+        matchType: Substring.self, ==)
+      {
+        Regex {
+          CharacterClass.verticalWhitespace.inverted
+        }.matchingSemantics(.unicodeScalar).asciiOnlyWhitespace(asciiOnly)
+      }
+      try _testDSLCaptures(
+        ("\r", nil),
+        ("\r\n", nil),
+        matchType: Substring.self, ==)
+      {
+        Regex {
+          CharacterClass.verticalWhitespace.inverted
+          "\n"
+        }.matchingSemantics(.unicodeScalar).asciiOnlyWhitespace(asciiOnly)
+      }
+    }
+
+    // Make sure horizontal whitespace does not match newlines or other
+    // vertical whitespace.
+    try _testDSLCaptures(
+      ("  \u{A0} \u{9}  \t ", "  \u{A0} \u{9}  \t "),
+      (" \n", nil),
+      (" \r", nil),
+      (" \r\n", nil),
+      (" \u{2028}", nil),
+      matchType: Substring.self, ==)
+    {
+      OneOrMore(.horizontalWhitespace)
+    }
+
+    // Horizontal whitespace in ASCII mode.
+    try _testDSLCaptures(
+      ("   \u{9}  \t ", "   \u{9}  \t "),
+      ("\u{A0}", nil),
+      matchType: Substring.self, ==)
+    {
+      Regex {
+        OneOrMore(.horizontalWhitespace)
+      }.asciiOnlyWhitespace()
+    }
   }
 
   func testCharacterClassOperations() throws {
+    // Must have new stdlib for character class ranges.
+    guard ensureNewStdlib() else { return }
+
     try _testDSLCaptures(
       ("bcdefn1a", "bcdefn1a"),
       ("nbcdef1a", nil),        // fails symmetric difference lookahead
@@ -133,6 +274,105 @@ class RegexDSLTests: XCTestCase {
     }
   }
 
+  func testAny() throws {
+    // .any matches newlines regardless of matching options.
+    for dotMatchesNewline in [true, false] {
+      try _testDSLCaptures(
+        ("abc\(allNewlines)def", "abc\(allNewlines)def"),
+        matchType: Substring.self, ==)
+      {
+        Regex {
+          OneOrMore(.any)
+        }.dotMatchesNewlines(dotMatchesNewline)
+      }
+    }
+
+    // `.anyGraphemeCluster` is the same as `.any` in grapheme mode.
+    for mode in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] {
+      try _testDSLCaptures(
+        ("a", "a"),
+        ("\r\n", "\r\n"),
+        ("e\u{301}", "e\u{301}"),
+        ("e\u{301}f", nil),
+        ("e\u{303}\u{301}\u{302}", "e\u{303}\u{301}\u{302}"),
+        matchType: Substring.self, ==)
+      {
+        Regex {
+          One(.anyGraphemeCluster)
+        }.matchingSemantics(mode)
+      }
+
+      // Like `.any` it also always matches newlines.
+      for dotMatchesNewline in [true, false] {
+        try _testDSLCaptures(
+          ("abc\(allNewlines)def", "abc\(allNewlines)def"),
+          matchType: Substring.self, ==)
+        {
+          Regex {
+            OneOrMore(.anyGraphemeCluster)
+          }.matchingSemantics(mode).dotMatchesNewlines(dotMatchesNewline)
+        }
+      }
+    }
+  }
+
+  func testAnyNonNewline() throws {
+    // `.anyNonNewline` is `.` without single-line mode.
+    for mode in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] {
+      for dotMatchesNewline in [true, false] {
+        try _testDSLCaptures(
+          ("abcdef", "abcdef"),
+          ("abcdef\n", nil),
+          ("\r\n", nil),
+          ("\r", nil),
+          ("\n", nil),
+          matchType: Substring.self, ==)
+        {
+          Regex {
+            OneOrMore(.anyNonNewline)
+          }.matchingSemantics(mode).dotMatchesNewlines(dotMatchesNewline)
+        }
+
+        try _testDSLCaptures(
+          ("abcdef", nil),
+          ("abcdef\n", nil),
+          ("\r\n", "\r\n"),
+          ("\r", "\r"),
+          ("\n", "\n"),
+          matchType: Substring.self, ==)
+        {
+          Regex {
+            OneOrMore(.anyNonNewline.inverted)
+          }.matchingSemantics(mode).dotMatchesNewlines(dotMatchesNewline)
+        }
+
+        try _testDSLCaptures(
+          ("abc", "abc"),
+          ("abcd", nil),
+          ("\r\n", nil),
+          ("\r", nil),
+          ("\n", nil),
+          matchType: Substring.self, ==)
+        {
+          Regex {
+            OneOrMore(CharacterClass.anyNonNewline.intersection(.anyOf("\n\rabc")))
+          }.matchingSemantics(mode).dotMatchesNewlines(dotMatchesNewline)
+        }
+      }
+    }
+
+    try _testDSLCaptures(
+      ("\r\n", "\r\n"), matchType: Substring.self, ==) {
+        CharacterClass.anyNonNewline.inverted
+      }
+    try _testDSLCaptures(
+      ("\r\n", nil), matchType: Substring.self, ==) {
+        Regex {
+          CharacterClass.anyNonNewline.inverted
+        }.matchingSemantics(.unicodeScalar)
+      }
+  }
+
   func testMatchResultDotZeroWithoutCapture() throws {
     let match = try XCTUnwrap("aaa".wholeMatch { OneOrMore { "a" } })
     XCTAssertEqual(match.0, "aaa")
@@ -234,8 +474,10 @@ class RegexDSLTests: XCTestCase {
       ("abcabc", "abcabc"),
       ("abcABCaBc", "abcABCaBc"),
       matchType: Substring.self, ==) {
-        OneOrMore {
-          "abc"
+        Regex {
+          OneOrMore {
+            "abc"
+          }
         }.ignoresCase(true)
       }
     
@@ -247,8 +489,10 @@ class RegexDSLTests: XCTestCase {
       ("abcabc", "abcabc"),
       ("abcABCaBc", "abcABCaBc"),
       matchType: Substring.self, ==) {
-        OneOrMore {
-          "abc"
+        Regex {
+          OneOrMore {
+            "abc"
+          }
         }
         .ignoresCase(true)
         .ignoresCase(false)
@@ -264,9 +508,13 @@ class RegexDSLTests: XCTestCase {
       ("abcabc", "abcabc"),
       ("abcdeABCdeaBcde", "abcdeABCdeaBcde"),
       matchType: Substring.self, ==) {
-        OneOrMore {
-          "abc".ignoresCase(true)
-          Optionally("de")
+        Regex {
+          OneOrMore {
+            Regex {
+              "abc"
+            }.ignoresCase(true)
+            Optionally("de")
+          }
         }
         .ignoresCase(false)
       }
@@ -303,11 +551,13 @@ class RegexDSLTests: XCTestCase {
         "stop"
         " "
         
-        Capture {
-          OneOrMore(.word)
-          Anchor.wordBoundary
-        }
-        .wordBoundaryKind(.unicodeLevel1)
+        Regex {
+          Capture {
+            OneOrMore(.word)
+            Anchor.wordBoundary
+          }
+        }.wordBoundaryKind(.simple)
+        
         OneOrMore(.any, .reluctant)
         "stop"
       }
@@ -317,15 +567,17 @@ class RegexDSLTests: XCTestCase {
       matchType: (Substring, Substring, Substring).self, ==) {
         Capture {
           // Reluctant behavior due to option
-          OneOrMore(.anyOf("abcd"))
-            .repetitionBehavior(.reluctant)
+          Regex {
+            OneOrMore(.anyOf("abcd"))
+          }.repetitionBehavior(.reluctant)
         }
         ZeroOrMore("a"..."z")
         
         Capture {
           // Eager behavior due to explicit parameter, despite option
-          OneOrMore(.digit, .eager)
-            .repetitionBehavior(.reluctant)
+          Regex {
+            OneOrMore(.digit, .eager)
+          }.repetitionBehavior(.reluctant)
         }
         ZeroOrMore(.digit)
       }
@@ -334,16 +586,20 @@ class RegexDSLTests: XCTestCase {
       ("abcdefg", ("abcdefg", "abcdefg")),
       ("abcdéfg", ("abcdéfg", "abcd")),
       matchType: (Substring, Substring).self, ==) {
-        Capture {
-          OneOrMore(.word)
-        }
-        .asciiOnlyWordCharacters()
+        Regex {
+          Capture {
+            OneOrMore(.word)
+          }
+        }.asciiOnlyWordCharacters()
         
         ZeroOrMore(.any)
       }
   }
   
   func testQuantificationBehavior() throws {
+    // Must have new stdlib for character class ranges.
+    guard ensureNewStdlib() else { return }
+
     // Eager by default
     try _testDSLCaptures(
       ("abc1def2", ("abc1def2", "2")),
@@ -368,8 +624,10 @@ class RegexDSLTests: XCTestCase {
       ("abc1def2", ("abc1def2", "1")),
       matchType: (Substring, Substring).self, ==)
     {
-      OneOrMore(.reluctant) {
-        One(.word)
+      Regex {
+        OneOrMore(.reluctant) {
+          One(.word)
+        }
       }.repetitionBehavior(.possessive)
       Capture(.digit)
       ZeroOrMore(.any)
@@ -421,8 +679,9 @@ class RegexDSLTests: XCTestCase {
     {
       Regex {
         Capture {
-          OneOrMore("a")
-            .repetitionBehavior(.eager)
+          Regex {
+            OneOrMore("a")
+          }.repetitionBehavior(.eager)
         }
         OneOrMore("a")
       }.repetitionBehavior(.possessive)
@@ -434,8 +693,9 @@ class RegexDSLTests: XCTestCase {
     {
       Regex {
         Capture {
-          OneOrMore("a")
-            .repetitionBehavior(.reluctant)
+          Regex {
+            OneOrMore("a")
+          }.repetitionBehavior(.reluctant)
         }
         OneOrMore("a")
       }.repetitionBehavior(.possessive)
@@ -674,19 +934,40 @@ class RegexDSLTests: XCTestCase {
         Anchor.endOfSubject
       }.anchorsMatchLineEndings()
     }
-    
-    // FIXME: Anchor.start/endOfLine needs to always match line endings,
-    // even when the `anchorsMatchLineEndings()` option is turned off.
+
     try _testDSLCaptures(
-      ("\naaa", "aaa"),
-      ("aaa\n", "aaa"),
-      ("\naaa\n", "aaa"),
-      matchType: Substring.self, ==, xfail: true)
+      ("\naaa", "\naaa"),
+      ("aaa\n", "aaa\n"),
+      ("\naaa\n", "\naaa\n"),
+      matchType: Substring.self, ==)
     {
       Regex {
+        Optionally { "\n" }
         Anchor.startOfLine
         Repeat("a", count: 3)
         Anchor.endOfLine
+        Optionally { "\n" }
+      }
+    }
+
+    // startOfLine/endOfLine apply regardless of mode.
+    for matchLineEndings in [true, false] {
+      for mode in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] {
+        let r = Regex {
+          Anchor.startOfLine
+          Repeat("a", count: 3)
+          Anchor.endOfLine
+        }.anchorsMatchLineEndings(matchLineEndings).matchingSemantics(mode)
+
+        XCTAssertNotNil(try r.firstMatch(in: "\naaa"))
+        XCTAssertNotNil(try r.firstMatch(in: "aaa\n"))
+        XCTAssertNotNil(try r.firstMatch(in: "\naaa\n"))
+        XCTAssertNotNil(try r.firstMatch(in: "\naaa\r\n"))
+        XCTAssertNotNil(try r.firstMatch(in: "\r\naaa\n"))
+        XCTAssertNotNil(try r.firstMatch(in: "\r\naaa\r\n"))
+
+        XCTAssertNil(try r.firstMatch(in: "\nbaaa\n"))
+        XCTAssertNil(try r.firstMatch(in: "\naaab\n"))
       }
     }
   }
@@ -1120,6 +1401,121 @@ class RegexDSLTests: XCTestCase {
     }
   }
 
+  func testScalarMatching() throws {
+    // RegexBuilder provides a RegexComponent conformance for UnicodeScalar. In
+    // grapheme cluster mode, it should only match entire graphemes. It may
+    // match a single scalar of a grapheme cluster in scalar semantic mode.
+    XCTAssertNotNil("a".firstMatch(of: "a" as UnicodeScalar))
+    XCTAssertNil("a\u{301}".firstMatch(of: "a" as UnicodeScalar))
+    XCTAssertNotNil("a\u{301}".firstMatch(
+      of: ("a" as UnicodeScalar).regex.matchingSemantics(.unicodeScalar)))
+
+    let r1 = Regex {
+      "a" as UnicodeScalar
+    }
+    XCTAssertNil(try r1.firstMatch(in: "a\u{301}"))
+    XCTAssertNotNil(
+      try r1.matchingSemantics(.unicodeScalar).firstMatch(in: "a\u{301}")
+    )
+
+    let r2 = Regex {
+      CharacterClass.anyOf(["a" as UnicodeScalar, "👍"])
+    }
+    XCTAssertNil(try r2.firstMatch(in: "a\u{301}"))
+    XCTAssertNotNil(
+      try r2.matchingSemantics(.unicodeScalar).firstMatch(in: "a\u{301}")
+    )
+
+    let r3 = Regex {
+      "👨" as UnicodeScalar
+      "\u{200D}" as UnicodeScalar
+      "👨" as UnicodeScalar
+      "\u{200D}" as UnicodeScalar
+      "👧" as UnicodeScalar
+      "\u{200D}" as UnicodeScalar
+      "👦" as UnicodeScalar
+    }
+    XCTAssertNotNil(try r3.firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r3.wholeMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r3.matchingSemantics(.unicodeScalar).firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r3.matchingSemantics(.unicodeScalar).wholeMatch(in: "👨‍👨‍👧‍👦"))
+
+    let r4 = Regex { "é" as UnicodeScalar }
+    XCTAssertNotNil(
+      try r4.firstMatch(in: "e\u{301}")
+    )
+    XCTAssertNotNil(
+      try r4.firstMatch(in: "é")
+    )
+
+    let r5 = Regex {
+      "e"
+      "\u{301}" as UnicodeScalar
+    }
+    XCTAssertNotNil(try r5.firstMatch(in: "e\u{301}"))
+    XCTAssertNotNil(try r5.firstMatch(in: "é"))
+
+    let r6 = Regex {
+      "abcde"
+      "\u{301}"
+    }
+    XCTAssertNotNil(try r6.firstMatch(in: "abcde\u{301}"))
+    XCTAssertNotNil(try r6.firstMatch(in: "abcdé"))
+
+    let r7 = Regex {
+      "e" as Character
+      "\u{301}" as Character
+    }
+    XCTAssertNotNil(try r7.firstMatch(in: "e\u{301}"))
+    XCTAssertNotNil(try r7.firstMatch(in: "é"))
+
+    // You can't match a partial grapheme in grapheme semantic mode.
+    let r8 = Regex {
+      "👨" as UnicodeScalar
+      "\u{200D}" as UnicodeScalar
+      "👨" as UnicodeScalar
+      "\u{200D}" as UnicodeScalar
+      "👧" as UnicodeScalar
+    }
+    XCTAssertNil(try r8.firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNil(try r8.wholeMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r8.matchingSemantics(.unicodeScalar).firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNil(try r8.matchingSemantics(.unicodeScalar).wholeMatch(in: "👨‍👨‍👧‍👦"))
+
+    // Scalar coalescing occurs across nested concatenations and literals.
+    let r9 = Regex {
+      Regex {
+        try! Regex(#"👨"#)
+        "\u{200D}" as UnicodeScalar
+        Regex {
+          "👨" as UnicodeScalar
+        }
+      }
+      Regex {
+        Regex {
+          "\u{200D}" as UnicodeScalar
+          "👧"
+        }
+        try! Regex(#"\u{200D}👦"#)
+      }
+    }
+    XCTAssertNotNil(try r9.firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r9.wholeMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r9.matchingSemantics(.unicodeScalar).firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r9.matchingSemantics(.unicodeScalar).wholeMatch(in: "👨‍👨‍👧‍👦"))
+
+    let r10 = Regex {
+      "👨" as UnicodeScalar
+      try! Regex(#"\u{200D 1F468 200D 1F467}"#)
+      "\u{200D}" as UnicodeScalar
+      "👦" as UnicodeScalar
+    }
+    XCTAssertNotNil(try r10.firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r10.wholeMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r10.matchingSemantics(.unicodeScalar).firstMatch(in: "👨‍👨‍👧‍👦"))
+    XCTAssertNotNil(try r10.matchingSemantics(.unicodeScalar).wholeMatch(in: "👨‍👨‍👧‍👦"))
+  }
+
   struct SemanticVersion: Equatable {
     var major: Int
     var minor: Int
diff --git a/Tests/RegexTests/CompileTests.swift b/Tests/RegexTests/CompileTests.swift
index 4e64f7335..27f8d79cb 100644
--- a/Tests/RegexTests/CompileTests.swift
+++ b/Tests/RegexTests/CompileTests.swift
@@ -11,9 +11,135 @@
 
 @testable import _RegexParser
 @testable import _StringProcessing
+import TestSupport
 
 import XCTest
 
+enum DecodedInstr {
+  case invalid
+  case moveImmediate
+  case moveCurrentPosition
+  case branch
+  case condBranchZeroElseDecrement
+  case condBranchSamePosition
+  case save
+  case saveAddress
+  case splitSaving
+  case clear
+  case clearThrough
+  case accept
+  case fail
+  case advance
+  case match
+  case matchCaseInsensitive
+  case matchScalar
+  case matchScalarCaseInsensitiveUnchecked
+  case matchScalarCaseInsensitive
+  case matchScalarUnchecked
+  case matchBitsetScalar
+  case matchBitset
+  case consumeBy
+  case assertBy
+  case matchBy
+  case backreference
+  case beginCapture
+  case endCapture
+  case transformCapture
+  case captureValue
+  case builtinAssertion
+  case builtinCharacterClass
+}
+
+extension DecodedInstr {
+  /// Decode the given instruction by looking at the opcode and payload, expanding out certain instructions
+  /// like matchScalar and match into their variants
+  ///
+  /// Must stay in sync with Processor.cycle
+  static func decode(_ instruction: Instruction) -> DecodedInstr {
+      let (opcode, payload) = instruction.destructure
+
+      switch opcode {
+      case .invalid:
+        fatalError("Invalid program")
+      case .moveImmediate:
+        return .moveImmediate
+      case .moveCurrentPosition:
+        return .moveCurrentPosition
+      case .branch:
+        return .branch
+      case .condBranchZeroElseDecrement:
+        return .condBranchZeroElseDecrement
+      case .condBranchSamePosition:
+        return .condBranchSamePosition
+      case .save:
+        return .save
+      case .saveAddress:
+        return .saveAddress
+      case .splitSaving:
+        return .splitSaving
+      case .clear:
+        return .clear
+      case .clearThrough:
+        return .clearThrough
+      case .accept:
+        return .accept
+      case .fail:
+        return .fail
+      case .advance:
+        return .advance
+      case .match:
+        let (isCaseInsensitive, _) = payload.elementPayload
+        if isCaseInsensitive {
+          return .matchCaseInsensitive
+        } else {
+          return .match
+        }
+      case .matchScalar:
+        let (_, caseInsensitive, boundaryCheck) = payload.scalarPayload
+        if caseInsensitive {
+          if boundaryCheck {
+            return .matchScalarCaseInsensitive
+          } else {
+            return .matchScalarCaseInsensitiveUnchecked
+          }
+        } else {
+          if boundaryCheck {
+            return .matchScalar
+          } else {
+            return .matchScalarUnchecked
+          }
+        }
+      case .matchBitset:
+        let (isScalar, _) = payload.bitsetPayload
+        if isScalar {
+          return .matchBitsetScalar
+        } else {
+          return .matchBitset
+        }
+      case .consumeBy:
+        return consumeBy
+      case .assertBy:
+        return .assertBy
+      case .matchBy:
+        return .matchBy
+      case .backreference:
+        return .backreference
+      case .beginCapture:
+        return .beginCapture
+      case .endCapture:
+        return .endCapture
+      case .transformCapture:
+        return .transformCapture
+      case .captureValue:
+        return .captureValue
+      case .builtinAssertion:
+        return .builtinAssertion
+      case .builtinCharacterClass:
+        return .builtinCharacterClass
+}
+  }
+}
+
 extension RegexTests {
 
   private func testCompilationEquivalence(
@@ -43,6 +169,45 @@ extension RegexTests {
     }
   }
 
+  private func testCompileError(
+    _ regex: String, _ error: RegexCompilationError,
+    file: StaticString = #file, line: UInt = #line
+  ) {
+    do {
+      _ = try _compileRegex(regex)
+      XCTFail("Expected compile error", file: file, line: line)
+    } catch let err as RegexCompilationError {
+      XCTAssertEqual(err, error, file: file, line: line)
+    } catch {
+      XCTFail("Unknown compile error", file: file, line: line)
+    }
+  }
+
+  func testInvalidScalarCoalescing() throws {
+    guard ensureNewStdlib() else { return }
+
+    // Non-single-scalar bounds.
+    testCompileError(
+      #"[a\u{302}-✅]"#, .invalidCharacterClassRangeOperand("a\u{302}"))
+    testCompileError(
+      #"[e\u{301}-\u{302}]"#, .invalidCharacterClassRangeOperand("e\u{301}"))
+    testCompileError(
+      #"[\u{73}\u{323}\u{307}-\u{1E00}]"#,
+      .invalidCharacterClassRangeOperand("\u{73}\u{323}\u{307}"))
+    testCompileError(
+      #"[a\u{315}\u{301}-\u{302}]"#,
+      .invalidCharacterClassRangeOperand("a\u{315}\u{301}")
+    )
+    testCompileError(
+      #"[a-z1e\u{301}-\u{302}\u{E1}3-59]"#,
+      .invalidCharacterClassRangeOperand("e\u{301}")
+    )
+    testCompileError(
+      #"[[e\u{301}-\u{302}]&&e\u{303}]"#,
+      .invalidCharacterClassRangeOperand("e\u{301}")
+    )
+  }
+
   func testCompileQuantification() throws {
 
     // NOTE: While we might change how we compile
@@ -147,16 +312,24 @@ extension RegexTests {
     for regex: String,
     syntax: SyntaxOptions = .traditional,
     semanticLevel: RegexSemanticLevel? = nil,
-    contains targets: Set<Instruction.OpCode>,
+    contains targets: Set<DecodedInstr> = [],
+    doesNotContain invalid: Set<DecodedInstr> = [],
     file: StaticString = #file,
     line: UInt = #line
   ) {
     do {
       let prog = try _compileRegex(regex, syntax, semanticLevel)
-      var found: Set<Instruction.OpCode> = []
+      var found: Set<DecodedInstr> = []
       for inst in prog.engine.instructions {
-        if targets.contains(inst.opcode) {
-          found.insert(inst.opcode)
+        let decoded = DecodedInstr.decode(inst)
+        found.insert(decoded)
+
+        if invalid.contains(decoded) {
+          XCTFail(
+            "Compiled regex '\(regex)' contains incorrect opcode \(decoded)",
+            file: file,
+            line: line)
+          return
         }
       }
 
@@ -174,38 +347,139 @@ extension RegexTests {
     }
   }
 
-  private func expectProgram(
-    for regex: String,
-    syntax: SyntaxOptions = .traditional,
-    semanticLevel: RegexSemanticLevel? = nil,
-    doesNotContain targets: Set<Instruction.OpCode>,
-    file: StaticString = #file,
-    line: UInt = #line
-  ) {
-    do {
-      let prog = try _compileRegex(regex, syntax, semanticLevel)
-      for inst in prog.engine.instructions {
-        if targets.contains(inst.opcode) {
-          XCTFail(
-            "Compiled regex '\(regex)' contains incorrect opcode \(inst.opcode)",
-            file: file,
-            line: line)
-          return
-        }
-      }
-    } catch {
-      XCTFail(
-        "Failed to compile regex '\(regex)': \(error)",
-        file: file,
-        line: line)
-    }
+  func testBitsetCompile() {
+    expectProgram(
+      for: "[abc]",
+      contains: [.matchBitset],
+      doesNotContain: [.consumeBy, .matchBitsetScalar])
+    expectProgram(
+      for: "[abc]",
+      semanticLevel: .unicodeScalar,
+      contains: [.matchBitsetScalar],
+      doesNotContain: [.matchBitset, .consumeBy])
+    expectProgram(
+      for: #"[\Qab\Ec]"#,
+      contains: [.matchBitset],
+      doesNotContain: [.consumeBy, .matchBitsetScalar])
+    expectProgram(
+      for: #"[\Qab\Ec]"#,
+      semanticLevel: .unicodeScalar,
+      contains: [.matchBitsetScalar],
+      doesNotContain: [.matchBitset, .consumeBy])
   }
 
-  func testBitsetCompile() {
-    expectProgram(for: "[abc]", contains: [.matchBitset])
-    expectProgram(for: "[abc]", doesNotContain: [.consumeBy])
+  func testScalarOptimizeCompilation() {
+    // all ascii quoted literal -> elide boundary checks
+    expectProgram(
+      for: "abcd",
+      contains: [.matchScalar, .matchScalarUnchecked],
+      doesNotContain: [.match, .consumeBy])
+    // ascii character -> matchScalar with boundary check
+    expectProgram(
+      for: "a",
+      contains: [.matchScalar],
+      doesNotContain: [.match, .consumeBy, .matchScalarUnchecked])
+    // quoted literal is not all ascii -> match scalar when possible, always do boundary checks
+    expectProgram(
+      for: "aaa\u{301}",
+      contains: [.match, .matchScalar],
+      doesNotContain: [.consumeBy, .matchScalarUnchecked])
+    // scalar mode -> always emit match scalar without boundary checks
+    expectProgram(
+      for: "abcd",
+      semanticLevel: .unicodeScalar,
+      contains: [.matchScalarUnchecked],
+      doesNotContain: [.match, .consumeBy, .matchScalar])
+    expectProgram(
+      for: "a",
+      semanticLevel: .unicodeScalar,
+      contains: [.matchScalarUnchecked],
+      doesNotContain: [.match, .consumeBy, .matchScalar])
+    expectProgram(
+      for: "aaa\u{301}",
+      semanticLevel: .unicodeScalar,
+      contains: [.matchScalarUnchecked],
+      doesNotContain: [.match, .consumeBy, .matchScalar])
+  }
+  
+  func testCaseInsensitivityCompilation() {
+    // quoted literal is all ascii -> match scalar case insensitive and skip
+    // boundary checks
+    expectProgram(
+      for: "(?i)abcd",
+      contains: [.matchScalarCaseInsensitiveUnchecked, .matchScalarCaseInsensitive],
+      doesNotContain: [.match, .matchCaseInsensitive, .matchScalar, .matchScalarUnchecked])
+    // quoted literal is all non-cased ascii -> emit match scalar instructions
+    expectProgram(
+      for: "(?i)&&&&",
+      contains: [.matchScalar, .matchScalarUnchecked],
+      doesNotContain: [.match, .matchCaseInsensitive,
+        .matchScalarCaseInsensitive, .matchScalarCaseInsensitiveUnchecked])
+    // quoted literal is not all ascii -> match scalar case insensitive when
+    // possible, match character case insensitive when needed, always perform
+    // boundary check
+    expectProgram(
+      for: "(?i)abcd\u{301}",
+      contains: [.matchCaseInsensitive, .matchScalarCaseInsensitive],
+      doesNotContain: [.matchScalarCaseInsensitiveUnchecked, .match, .matchScalar])
+    // same as before but contains ascii non cased characters -> emit matchScalar for them
+    expectProgram(
+      for: "(?i)abcd\u{301};.'!",
+      contains: [.matchCaseInsensitive, .matchScalarCaseInsensitive, .matchScalar],
+      doesNotContain: [.matchScalarCaseInsensitiveUnchecked, .match])
+    // contains non-ascii non-cased characters -> emit match
+    expectProgram(
+      for: "(?i)abcd\u{301};.'!💖",
+      contains: [.matchCaseInsensitive, .matchScalarCaseInsensitive, .matchScalar, .match],
+      doesNotContain: [.matchScalarCaseInsensitiveUnchecked])
+    
+    // scalar mode -> emit unchecked scalar match only, emit case insensitive
+    // only if the scalar is cased
+    expectProgram(
+      for: "(?i);.'!💖",
+      semanticLevel: .unicodeScalar,
+      contains: [.matchScalarUnchecked],
+      doesNotContain: [.matchScalarCaseInsensitiveUnchecked])
+    expectProgram(
+      for: "(?i)abcdé",
+      semanticLevel: .unicodeScalar,
+      contains: [.matchScalarCaseInsensitiveUnchecked],
+      doesNotContain: [.matchScalarUnchecked])
+  }
+
+  func testQuantificationForwardProgressCompile() {
+    // Unbounded quantification + non forward progressing inner nodes
+    // Expect to emit the position checking instructions
+    expectProgram(for: #"(?:(?=a)){1,}"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:\b)*"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:(?#comment))+"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:|)+"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:\w|)+"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:\w|(?i-i:))+"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:\w|(?#comment))+"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:\w|(?#comment)(?i-i:))+"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:\w|(?i))+"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
 
-    expectProgram(for: "[abc]", semanticLevel: .unicodeScalar, doesNotContain: [.matchBitset])
-    expectProgram(for: "[abc]", semanticLevel: .unicodeScalar, contains: [.consumeBy])
+    // Bounded quantification, don't emit position checking
+    expectProgram(for: #"(?:(?=a)){1,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:\b)?"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:(?#comment)){,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:|){,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:\w|){,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:\w|(?i-i:)){,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:\w|(?#comment)){,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:\w|(?#comment)(?i-i:)){,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(?:\w|(?i)){,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
+  
+    // Inner node is a quantification that does not guarantee forward progress
+    expectProgram(for: #"(a*)*"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(a?)*"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(a{,5})*"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"((\b){,4})*"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"((\b){1,4})*"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"((|){1,4})*"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
+    // Inner node is a quantification that guarantees forward progress
+    expectProgram(for: #"(a+)*"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
+    expectProgram(for: #"(a{1,})*"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
   }
 }
diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift
index d375065ab..8e01582a9 100644
--- a/Tests/RegexTests/MatchTests.swift
+++ b/Tests/RegexTests/MatchTests.swift
@@ -12,6 +12,7 @@
 import XCTest
 @testable import _RegexParser
 @testable import _StringProcessing
+import TestSupport
 
 struct MatchError: Error {
   var message: String
@@ -24,24 +25,35 @@ func _firstMatch(
   _ regexStr: String,
   input: String,
   validateOptimizations: Bool,
+  semanticLevel: RegexSemanticLevel = .graphemeCluster,
   syntax: SyntaxOptions = .traditional
-) throws -> (String, [String?]) {
-  var regex = try Regex(regexStr, syntax: syntax)
-  guard let result = try regex.firstMatch(in: input) else {
-    throw MatchError("match not found for \(regexStr) in \(input)")
-  }
-  let caps = result.output.slices(from: input)
-  
+) throws -> (String, [String?])? {
+  var regex = try Regex(regexStr, syntax: syntax).matchingSemantics(semanticLevel)
+  let result = try regex.firstMatch(in: input)
+
   if validateOptimizations {
     regex._setCompilerOptionsForTesting(.disableOptimizations)
-    guard let unoptResult = try regex.firstMatch(in: input) else {
+    let unoptResult = try regex.firstMatch(in: input)
+    if result != nil && unoptResult == nil {
       throw MatchError("match not found for unoptimized \(regexStr) in \(input)")
     }
-    XCTAssertEqual(
-      String(input[result.range]),
-      String(input[unoptResult.range]),
-      "Unoptimized regex returned a different result")
+    if result == nil && unoptResult != nil {
+      throw MatchError("match not found in optimized \(regexStr) in \(input)")
+    }
+    if let result = result, let unoptResult = unoptResult {
+      let optMatch = String(input[result.range])
+      let unoptMatch = String(input[unoptResult.range])
+      if optMatch != unoptMatch {
+        throw MatchError("""
+
+        Unoptimized regex returned: '\(unoptMatch)'
+        Optimized regex returned: '\(optMatch)'
+        """)
+      }
+    }
   }
+  guard let result = result else { return nil }
+  let caps = result.output.slices(from: input)
   return (String(input[result.range]), caps.map { $0.map(String.init) })
 }
 
@@ -54,6 +66,7 @@ func flatCaptureTest(
   dumpAST: Bool = false,
   xfail: Bool = false,
   validateOptimizations: Bool = true,
+  semanticLevel: RegexSemanticLevel = .graphemeCluster,
   file: StaticString = #file,
   line: UInt = #line
 ) {
@@ -63,6 +76,7 @@ func flatCaptureTest(
         regex,
         input: test,
         validateOptimizations: validateOptimizations,
+        semanticLevel: semanticLevel,
         syntax: syntax
       ) else {
         if expect == nil {
@@ -113,6 +127,7 @@ func matchTest(
   dumpAST: Bool = false,
   xfail: Bool = false,
   validateOptimizations: Bool = true,
+  semanticLevel: RegexSemanticLevel = .graphemeCluster,
   file: StaticString = #file,
   line: UInt = #line
 ) {
@@ -126,6 +141,7 @@ func matchTest(
       dumpAST: dumpAST,
       xfail: xfail,
       validateOptimizations: validateOptimizations,
+      semanticLevel: semanticLevel,
       file: file,
       line: line)
   }
@@ -143,25 +159,25 @@ func firstMatchTest(
   dumpAST: Bool = false,
   xfail: Bool = false,
   validateOptimizations: Bool = true,
+  semanticLevel: RegexSemanticLevel = .graphemeCluster,
   file: StaticString = #filePath,
   line: UInt = #line
 ) {
   do {
-    let (found, _) = try _firstMatch(
+    let found = try _firstMatch(
       regex,
       input: input,
       validateOptimizations: validateOptimizations,
-      syntax: syntax)
+      semanticLevel: semanticLevel,
+      syntax: syntax)?.0
 
     if xfail {
       XCTAssertNotEqual(found, match, file: file, line: line)
     } else {
-      XCTAssertEqual(found, match, file: file, line: line)
+      XCTAssertEqual(found, match, "Incorrect match", file: file, line: line)
     }
   } catch {
-    // FIXME: This allows non-matches to succeed even when xfail'd
-    // When xfail == true, this should report failure for match == nil
-    if !xfail && match != nil {
+    if !xfail {
       XCTFail("\(error)", file: file, line: line)
     }
     return
@@ -175,6 +191,7 @@ func firstMatchTests(
   enableTracing: Bool = false,
   dumpAST: Bool = false,
   xfail: Bool = false,
+  semanticLevel: RegexSemanticLevel = .graphemeCluster,
   file: StaticString = #filePath,
   line: UInt = #line
 ) {
@@ -187,6 +204,7 @@ func firstMatchTests(
       enableTracing: enableTracing,
       dumpAST: dumpAST,
       xfail: xfail,
+      semanticLevel: semanticLevel,
       file: file,
       line: line)
   }
@@ -296,6 +314,55 @@ extension RegexTests {
       match: "\u{006f}\u{031b}\u{0323}"
     )
 
+    // e + combining accents
+    firstMatchTest(
+      #"e\u{301 302 303}"#,
+      input: "e\u{301}\u{302}\u{303}",
+      match: "e\u{301}\u{302}\u{303}"
+    )
+    firstMatchTest(
+      #"e\u{315 35C 301}"#,
+      input: "e\u{301}\u{315}\u{35C}",
+      match: "e\u{301}\u{315}\u{35C}"
+    )
+    firstMatchTest(
+      #"e\u{301}\u{302 303}"#,
+      input: "e\u{301}\u{302}\u{303}",
+      match: "e\u{301}\u{302}\u{303}"
+    )
+    firstMatchTest(
+      #"e\u{35C}\u{315 301}"#,
+      input: "e\u{301}\u{315}\u{35C}",
+      match: "e\u{301}\u{315}\u{35C}"
+    )
+    firstMatchTest(
+      #"e\u{35C}\u{315 301}"#,
+      input: "e\u{315}\u{301}\u{35C}",
+      match: "e\u{315}\u{301}\u{35C}"
+    )
+    firstMatchTest(
+      #"e\u{301}\de\u{302}"#,
+      input: "e\u{301}0e\u{302}",
+      match: "e\u{301}0e\u{302}"
+    )
+    firstMatchTest(
+      #"(?x) e \u{35C} \u{315}(?#hello)\u{301}"#,
+      input: "e\u{301}\u{315}\u{35C}",
+      match: "e\u{301}\u{315}\u{35C}"
+    )
+    firstMatchTest(
+      #"(?x) e \u{35C} \u{315 301}"#,
+      input: "e\u{301}\u{315}\u{35C}",
+      match: "e\u{301}\u{315}\u{35C}"
+    )
+
+    // We don't coalesce across groups.
+    firstMatchTests(
+      #"e\u{301}(?:\u{315}\u{35C})?"#,
+      ("e\u{301}", "e\u{301}"),
+      ("e\u{301}\u{315}\u{35C}", nil)
+    )
+
     // Escape sequences that represent scalar values.
     firstMatchTest(#"\a[\b]\e\f\n\r\t"#,
                    input: "\u{7}\u{8}\u{1B}\u{C}\n\r\t",
@@ -304,8 +371,6 @@ extension RegexTests {
                    input: "\u{7}\u{8}\u{1B}\u{C}\n\r\t",
                    match: "\u{7}\u{8}\u{1B}\u{C}\n\r\t")
 
-    firstMatchTest(#"\r\n"#, input: "\r\n", match: "\r\n")
-
     // MARK: Quotes
 
     firstMatchTest(
@@ -421,8 +486,7 @@ extension RegexTests {
       "a++a",
       ("babc", nil),
       ("baaabc", nil),
-      ("bb", nil),
-      xfail: true)
+      ("bb", nil))
     firstMatchTests(
       "a+?a",
       ("babc", nil),
@@ -498,23 +562,19 @@ extension RegexTests {
       ("baabc", nil),
       ("bb", nil))
     
-    // XFAIL'd versions of the above
     firstMatchTests(
       "a{2,4}+a",
-      ("baaabc", nil),
-      xfail: true)
+      ("baaabc", nil))
     firstMatchTests(
       "a{,4}+a",
       ("babc", nil),
       ("baabc", nil),
-      ("baaabc", nil),
-      xfail: true)
+      ("baaabc", nil))
     firstMatchTests(
       "a{2,}+a",
       ("baaabc", nil),
       ("baaaaabc", nil),
-      ("baaaaaaaabc", nil),
-      xfail: true)
+      ("baaaaaaaabc", nil))
 
     // XFAIL'd possessive tests
     firstMatchTests(
@@ -561,6 +621,9 @@ extension RegexTests {
   }
 
   func testMatchCharacterClasses() {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     // MARK: Character classes
 
     firstMatchTest(#"abc\d"#, input: "xyzabc123", match: "abc1")
@@ -596,6 +659,12 @@ extension RegexTests {
               ("A", true),
               ("a", false))
 
+    matchTest(#"(?i)[a]"#,
+              ("💿", false),
+              ("a\u{301}", false),
+              ("A", true),
+              ("a", true))
+
     matchTest("[a]",
       ("a\u{301}", false))
 
@@ -610,14 +679,12 @@ extension RegexTests {
     // interpreted as matching the scalars "\r" or "\n".
     // It does not fully match the character "\r\n" because the character class
     // in scalar mode will only match one scalar
-    do {
-      let regex = try Regex("[\r\n]").matchingSemantics(.unicodeScalar)
-      XCTAssertEqual("\r", try regex.wholeMatch(in: "\r")?.0)
-      XCTAssertEqual("\n", try regex.wholeMatch(in: "\n")?.0)
-      XCTAssertEqual(nil, try regex.wholeMatch(in: "\r\n")?.0)
-    } catch {
-      XCTFail("\(error)", file: #filePath, line: #line)
-    }
+    matchTest(
+      "^[\r\n]$",
+      ("\r", true),
+      ("\n", true),
+      ("\r\n", false),
+      semanticLevel: .unicodeScalar)
 
     matchTest("[^\r\n]",
       ("\r\n", false),
@@ -625,7 +692,385 @@ extension RegexTests {
       ("\r", true))
     matchTest("[\n\r]",
       ("\n", true),
-      ("\r", true))
+      ("\r", true),
+      ("\r\n", false))
+    
+    matchTest(
+      #"[a]\u0301"#,
+      ("a\u{301}", false),
+      semanticLevel: .graphemeCluster)
+    matchTest(
+      #"[a]\u0301"#,
+      ("a\u{301}", true),
+      semanticLevel: .unicodeScalar)
+
+    let allNewlines = "\u{A}\u{B}\u{C}\u{D}\r\n\u{85}\u{2028}\u{2029}"
+    let asciiNewlines = "\u{A}\u{B}\u{C}\u{D}\r\n"
+
+    for level in [RegexSemanticLevel.graphemeCluster, .unicodeScalar] {
+      firstMatchTest(
+        #"\R+"#,
+        input: "abc\(allNewlines)def", match: allNewlines,
+        semanticLevel: level
+      )
+      firstMatchTest(
+        #"\v+"#,
+        input: "abc\(allNewlines)def", match: allNewlines,
+        semanticLevel: level
+      )
+    }
+
+    // In scalar mode, \R can match \r\n, \v cannot.
+    firstMatchTest(
+      #"\R"#, input: "\r\n", match: "\r\n", semanticLevel: .unicodeScalar)
+    firstMatchTest(
+      #"\v"#, input: "\r\n", match: "\r", semanticLevel: .unicodeScalar)
+    firstMatchTest(
+      #"\v\v"#, input: "\r\n", match: "\r\n", semanticLevel: .unicodeScalar)
+    firstMatchTest(
+      #"[^\v]"#, input: "\r\n", match: nil, semanticLevel: .unicodeScalar)
+
+    // ASCII-only spaces.
+    firstMatchTest(#"(?S)\R+"#, input: allNewlines, match: asciiNewlines)
+    firstMatchTest(#"(?S)\v+"#, input: allNewlines, match: asciiNewlines)
+    firstMatchTest(
+      #"(?S)\R"#, input: "\r\n", match: "\r\n", semanticLevel: .unicodeScalar)
+    firstMatchTest(
+      #"(?S)\v"#, input: "\r\n", match: "\r", semanticLevel: .unicodeScalar)
+
+    matchTest(
+      #"[a]\u0301"#,
+      ("a\u{301}", false),
+      semanticLevel: .graphemeCluster)
+    matchTest(
+      #"[a]\u0301"#,
+      ("a\u{301}", true),
+      semanticLevel: .unicodeScalar)
+
+    // Scalar matching in quoted sequences.
+    firstMatchTests(
+      "[\\Qe\u{301}\\E]",
+      ("e", nil),
+      ("E", nil),
+      ("\u{301}", nil),
+      (eDecomposed, eDecomposed),
+      (eComposed, eComposed),
+      ("E\u{301}", nil),
+      ("\u{C9}", nil)
+    )
+    firstMatchTests(
+      "[\\Qe\u{301}\\E]",
+      ("e", "e"),
+      ("E", nil),
+      ("\u{301}", "\u{301}"),
+      (eDecomposed, "e"),
+      (eComposed, nil),
+      ("E\u{301}", "\u{301}"),
+      ("\u{C9}", nil),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      "(?i)[\\Qe\u{301}\\E]",
+      ("e", nil),
+      ("E", nil),
+      ("\u{301}", nil),
+      (eDecomposed, eDecomposed),
+      (eComposed, eComposed),
+      ("E\u{301}", "E\u{301}"),
+      ("\u{C9}", "\u{C9}")
+    )
+    firstMatchTests(
+      "(?i)[\\Qe\u{301}\\E]",
+      ("e", "e"),
+      ("E", "E"),
+      ("\u{301}", "\u{301}"),
+      (eDecomposed, "e"),
+      (eComposed, nil),
+      ("E\u{301}", "E"),
+      ("\u{C9}", nil),
+      semanticLevel: .unicodeScalar
+    )
+
+    // Scalar coalescing.
+    firstMatchTests(
+      #"[e\u{301}]"#,
+      (eDecomposed, eDecomposed),
+      (eComposed, eComposed),
+      ("e", nil),
+      ("\u{301}", nil)
+    )
+    firstMatchTests(
+      #"[e\u{301}]"#,
+      (eDecomposed, "e"),
+      (eComposed, nil),
+      ("e", "e"),
+      ("\u{301}", "\u{301}"),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"[[[e\u{301}]]]"#,
+      (eDecomposed, eDecomposed),
+      (eComposed, eComposed),
+      ("e", nil),
+      ("\u{301}", nil)
+    )
+    firstMatchTests(
+      #"[[[e\u{301}]]]"#,
+      (eDecomposed, "e"),
+      (eComposed, nil),
+      ("e", "e"),
+      ("\u{301}", "\u{301}"),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"[👨\u{200D}👩\u{200D}👧\u{200D}👦]"#,
+      ("👨", nil),
+      ("👩", nil),
+      ("👧", nil),
+      ("👦", nil),
+      ("\u{200D}", nil),
+      ("👨‍👩‍👧‍👦", "👨‍👩‍👧‍👦")
+    )
+    firstMatchTests(
+      #"[👨\u{200D}👩\u{200D}👧\u{200D}👦]"#,
+      ("👨", "👨"),
+      ("👩", "👩"),
+      ("👧", "👧"),
+      ("👦", "👦"),
+      ("\u{200D}", "\u{200D}"),
+      ("👨‍👩‍👧‍👦", "👨"),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"[e\u{315}\u{301}\u{35C}]"#,
+      ("e", nil),
+      ("e\u{315}", nil),
+      ("e\u{301}", nil),
+      ("e\u{315}\u{301}\u{35C}", "e\u{315}\u{301}\u{35C}"),
+      ("e\u{301}\u{315}\u{35C}", "e\u{301}\u{315}\u{35C}"),
+      ("e\u{35C}\u{301}\u{315}", "e\u{35C}\u{301}\u{315}")
+    )
+    firstMatchTests(
+      #"(?x) [ e \u{315} \u{301} \u{35C} ]"#,
+      ("e", nil),
+      ("e\u{315}", nil),
+      ("e\u{301}", nil),
+      ("e\u{315}\u{301}\u{35C}", "e\u{315}\u{301}\u{35C}"),
+      ("e\u{301}\u{315}\u{35C}", "e\u{301}\u{315}\u{35C}"),
+      ("e\u{35C}\u{301}\u{315}", "e\u{35C}\u{301}\u{315}")
+    )
+
+    // We don't coalesce across character classes.
+    firstMatchTests(
+      #"e[\u{315}\u{301}\u{35C}]"#,
+      ("e", nil),
+      ("e\u{315}", nil),
+      ("e\u{315}\u{301}", nil),
+      ("e\u{301}\u{315}\u{35C}", nil)
+    )
+    firstMatchTests(
+      #"[e[\u{301}]]"#,
+      ("e", "e"),
+      ("\u{301}", "\u{301}"),
+      ("e\u{301}", nil)
+    )
+
+    firstMatchTests(
+      #"[a-z1\u{E9}-\u{302}\u{E1}3-59]"#,
+      ("a", "a"),
+      ("a\u{301}", "a\u{301}"),
+      ("\u{E1}", "\u{E1}"),
+      ("\u{E2}", nil),
+      ("z", "z"),
+      ("e", "e"),
+      (eDecomposed, eDecomposed),
+      (eComposed, eComposed),
+      ("\u{302}", "\u{302}"),
+      ("1", "1"),
+      ("2", nil),
+      ("3", "3"),
+      ("4", "4"),
+      ("5", "5"),
+      ("6", nil),
+      ("7", nil),
+      ("8", nil),
+      ("9", "9")
+    )
+    firstMatchTests(
+      #"[ab-df-hik-lm]"#,
+      ("a", "a"),
+      ("b", "b"),
+      ("c", "c"),
+      ("d", "d"),
+      ("e", nil),
+      ("f", "f"),
+      ("g", "g"),
+      ("h", "h"),
+      ("i", "i"),
+      ("j", nil),
+      ("k", "k"),
+      ("l", "l"),
+      ("m", "m")
+    )
+    firstMatchTests(
+      #"[a-ce-fh-j]"#,
+      ("a", "a"),
+      ("b", "b"),
+      ("c", "c"),
+      ("d", nil),
+      ("e", "e"),
+      ("f", "f"),
+      ("g", nil),
+      ("h", "h"),
+      ("i", "i"),
+      ("j", "j")
+    )
+
+
+    // These can't compile in grapheme semantic mode, but make sure they work in
+    // scalar semantic mode.
+    firstMatchTests(
+      #"[a\u{315}\u{301}-\u{302}]"#,
+      ("a", "a"),
+      ("\u{315}", "\u{315}"),
+      ("\u{301}", "\u{301}"),
+      ("\u{302}", "\u{302}"),
+      ("\u{303}", nil),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"[\u{73}\u{323}\u{307}-\u{1E00}]"#,
+      ("\u{73}", "\u{73}"),
+      ("\u{323}", "\u{323}"),
+      ("\u{307}", "\u{307}"),
+      ("\u{400}", "\u{400}"),
+      ("\u{500}", "\u{500}"),
+      ("\u{1E00}", "\u{1E00}"),
+      ("\u{1E01}", nil),
+      ("\u{1E69}", nil),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"[a\u{302}-✅]"#,
+      ("a", "a"),
+      ("\u{302}", "\u{302}"),
+      ("A\u{302}", "\u{302}"),
+      ("E\u{301}", nil),
+      ("a\u{301}", "a"),
+      ("\u{E1}", nil),
+      ("a\u{302}", "a"),
+      ("\u{E2}", nil),
+      ("\u{E3}", nil),
+      ("\u{EF}", nil),
+      ("e\u{301}", nil),
+      ("e\u{302}", "\u{302}"),
+      ("\u{2705}", "\u{2705}"),
+      ("✅", "✅"),
+      ("\u{376}", "\u{376}"),
+      ("\u{850}", "\u{850}"),
+      ("a\u{302}\u{315}", "a"),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"(?i)[a\u{302}-✅]"#,
+      ("a", "a"),
+      ("\u{302}", "\u{302}"),
+      ("A\u{302}", "A"),
+      ("E\u{301}", nil),
+      ("a\u{301}", "a"),
+      ("\u{E1}", nil),
+      ("a\u{302}", "a"),
+      ("\u{E2}", nil),
+      ("\u{E3}", nil),
+      ("\u{EF}", nil),
+      ("e\u{301}", nil),
+      ("e\u{302}", "\u{302}"),
+      ("\u{2705}", "\u{2705}"),
+      ("✅", "✅"),
+      ("\u{376}", "\u{376}"),
+      ("\u{850}", "\u{850}"),
+      ("a\u{302}\u{315}", "a"),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"[e\u{301}-\u{302}]"#,
+      ("a", nil),
+      ("e", "e"),
+      ("\u{302}", "\u{302}"),
+      ("A\u{302}", "\u{302}"),
+      ("E\u{301}", "\u{301}"),
+      ("\u{C8}", nil),
+      ("\u{C9}", nil),
+      ("\u{CA}", nil),
+      ("\u{CB}", nil),
+      ("a\u{301}", "\u{301}"),
+      ("a\u{302}", "\u{302}"),
+      ("e\u{301}", "e"),
+      ("e\u{302}", "e"),
+      ("\u{E1}", nil),
+      ("\u{E2}", nil),
+      ("\u{E9}", nil),
+      ("\u{EA}", nil),
+      ("\u{EF}", nil),
+      semanticLevel: .unicodeScalar
+    )
+    firstMatchTests(
+      #"(?i)[e\u{301}-\u{302}]"#,
+      ("a", nil),
+      ("e", "e"),
+      ("\u{302}", "\u{302}"),
+      ("A\u{302}", "\u{302}"),
+      ("E\u{301}", "E"),
+      ("\u{C8}", nil),
+      ("\u{C9}", nil),
+      ("\u{CA}", nil),
+      ("\u{CB}", nil),
+      ("a\u{301}", "\u{301}"),
+      ("a\u{302}", "\u{302}"),
+      ("e\u{301}", "e"),
+      ("e\u{302}", "e"),
+      ("\u{E1}", nil),
+      ("\u{E2}", nil),
+      ("\u{E9}", nil),
+      ("\u{EA}", nil),
+      ("\u{EF}", nil),
+      semanticLevel: .unicodeScalar
+    )
+
+    // Set operation scalar coalescing.
+    firstMatchTests(
+      #"[e\u{301}&&e\u{301}e\u{302}]"#,
+      ("e", nil),
+      ("\u{301}", nil),
+      ("\u{302}", nil),
+      ("e\u{301}", "e\u{301}"),
+      ("e\u{302}", nil))
+    firstMatchTests(
+      #"[e\u{301}~~[[e\u{301}]e\u{302}]]"#,
+      ("e", nil),
+      ("\u{301}", nil),
+      ("\u{302}", nil),
+      ("e\u{301}", nil),
+      ("e\u{302}", "e\u{302}"))
+    firstMatchTests(
+      #"[e\u{301}[e\u{303}]--[[e\u{301}]e\u{302}]]"#,
+      ("e", nil),
+      ("\u{301}", nil),
+      ("\u{302}", nil),
+      ("\u{303}", nil),
+      ("e\u{301}", nil),
+      ("e\u{302}", nil),
+      ("e\u{303}", "e\u{303}"))
+
+    firstMatchTests(
+      #"(?x) [ e \u{301} [ e \u{303} ] -- [ [ e \u{301} ] e \u{302} ] ]"#,
+      ("e", nil),
+      ("\u{301}", nil),
+      ("\u{302}", nil),
+      ("\u{303}", nil),
+      ("e\u{301}", nil),
+      ("e\u{302}", nil),
+      ("e\u{303}", "e\u{303}"))
 
     firstMatchTest("[-]", input: "123-abcxyz", match: "-")
 
@@ -709,6 +1154,15 @@ extension RegexTests {
     }
     firstMatchTest(#"[\t-\t]"#, input: "\u{8}\u{A}\u{9}", match: "\u{9}")
 
+    firstMatchTest(#"[12]"#, input: "1️⃣", match: nil)
+    firstMatchTest(#"[1-2]"#, input: "1️⃣", match: nil)
+    firstMatchTest(#"[\d]"#, input: "1️⃣", match: "1️⃣")
+    firstMatchTest(#"(?P)[\d]"#, input: "1️⃣", match: nil)
+    firstMatchTest("[0-2&&1-3]", input: "1️⃣", match: nil)
+    firstMatchTest("[1-2e\u{301}]", input: "1️⃣", match: nil)
+
+    firstMatchTest(#"[\u{3A9}-\u{3A9}]"#, input: "\u{3A9}", match: "\u{3A9}")
+
     // Currently not supported in the matching engine.
     for c: UnicodeScalar in ["a", "b", "c"] {
       firstMatchTest(#"[\c!-\C-#]"#, input: "def\(c)", match: "\(c)",
@@ -762,6 +1216,35 @@ extension RegexTests {
     firstMatchTest(#"["abc"]+"#, input: #""abc""#, match: "abc",
                    syntax: .experimental)
     firstMatchTest(#"["abc"]+"#, input: #""abc""#, match: #""abc""#)
+
+    for semantics in [RegexSemanticLevel.unicodeScalar, .graphemeCluster] {
+      // Case sensitivity and ranges.
+      for ch in "abcD" {
+        firstMatchTest("[a-cD]", input: String(ch), match: String(ch))
+      }
+      for ch in "ABCd" {
+        firstMatchTest("[a-cD]", input: String(ch), match: nil)
+      }
+      for ch in "abcABCdD" {
+        let input = String(ch)
+        firstMatchTest(
+          "(?i)[a-cd]", input: input, match: input, semanticLevel: semantics)
+        firstMatchTest(
+          "(?i)[A-CD]", input: input, match: input, semanticLevel: semantics)
+      }
+      for ch in "XYZ[\\]^_`abcd" {
+        let input = String(ch)
+        firstMatchTest(
+          "[X-cd]", input: input, match: input, semanticLevel: semantics)
+      }
+      for ch in "XYZ[\\]^_`abcxyzABCdD" {
+        let input = String(ch)
+        firstMatchTest(
+          "(?i)[X-cd]", input: input, match: input, semanticLevel: semantics)
+        firstMatchTest(
+          "(?i)[X-cD]", input: input, match: input, semanticLevel: semantics)
+      }
+    }
   }
 
   func testCharacterProperties() {
@@ -974,6 +1457,9 @@ extension RegexTests {
   }
 
   func testMatchAnchors() throws {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     // MARK: Anchors
     firstMatchTests(
       #"^\d+"#,
@@ -1022,8 +1508,6 @@ extension RegexTests {
       (" 123\n456\n", nil),
       ("123 456", "456"))
 
-    // FIXME: Keep this until _wordIndex and friends are
-#if os(Linux)
     firstMatchTests(
       #"\d+\b"#,
       ("123", "123"),
@@ -1041,7 +1525,6 @@ extension RegexTests {
       ("123", "23"),
       (" 123", "23"),
       ("123 456", "23"))
-#endif
 
     // TODO: \G and \K
     do {
@@ -1054,8 +1537,8 @@ extension RegexTests {
     // TODO: Oniguruma \y and \Y
     firstMatchTests(
       #"\u{65}"#,             // Scalar 'e' is present in both
-      ("Cafe\u{301}", nil),   // but scalar mode requires boundary at end of match
-      xfail: true)
+      ("Cafe\u{301}", nil))   // but scalar mode requires boundary at end of match
+
     firstMatchTests(
       #"\u{65}"#,             // Scalar 'e' is present in both
       ("Sol Cafe", "e"))      // standalone is okay
@@ -1072,9 +1555,10 @@ extension RegexTests {
       ("Sol Cafe", nil), xfail: true)
   }
 
-  // FIXME: Keep this until _wordIndex and friends are
-#if os(Linux)
   func testLevel2WordBoundaries() {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     // MARK: Level 2 Word Boundaries
     firstMatchTest(#"\b😊\b"#, input: "🔥😊👍", match: "😊")
     firstMatchTest(#"\b👨🏽\b"#, input: "👩🏻👶🏿👨🏽🧑🏾👩🏼", match: "👨🏽")
@@ -1090,9 +1574,11 @@ extension RegexTests {
     firstMatchTest(#"can\B\'\Bt"#, input: "I can't do that.", match: "can't")
     firstMatchTest(#"\b÷\b"#, input: "3 ÷ 3 = 1", match: "÷")
   }
-#endif
-  
+
   func testMatchGroups() {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     // MARK: Groups
 
     // Named captures
@@ -1316,6 +1802,9 @@ extension RegexTests {
   }
   
   func testMatchExamples() {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     // Backreferences
     matchTest(
       #"(sens|respons)e and \1ibility"#,
@@ -1365,8 +1854,6 @@ extension RegexTests {
       xfail: true
     )
 
-    // FIXME: Keep this until _wordIndex and friends are
-#if os(Linux)
     // HTML tags
     matchTest(
       #"<([a-zA-Z][a-zA-Z0-9]*)\b[^>]*>.*?</\1>"#,
@@ -1384,7 +1871,6 @@ extension RegexTests {
       ("pass me the the kettle", ["the"]),
       ("this doesn't have any", nil)
     )
-#endif
 
     // Floats
     flatCaptureTest(
@@ -1400,8 +1886,79 @@ extension RegexTests {
     firstMatchTest(#".+"#, input: "a\nb", match: "a")
     firstMatchTest(#"(?s:.+)"#, input: "a\nb", match: "a\nb")
   }
+
+  func testMatchNewlines() {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
+    for semantics in [RegexSemanticLevel.unicodeScalar, .graphemeCluster] {
+      firstMatchTest(
+        #"\r\n"#, input: "\r\n", match: "\r\n",
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"\r\n"#, input: "\n", match: nil, semanticLevel: semantics)
+      firstMatchTest(
+        #"\r\n"#, input: "\r", match: nil, semanticLevel: semantics)
+
+      // \r\n is not treated as ASCII.
+      firstMatchTest(
+        #"^\p{ASCII}$"#, input: "\r\n", match: nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"^\r$"#, input: "\r\n", match: nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"^[\r]$"#, input: "\r\n", match: nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"^\n$"#, input: "\r\n", match: nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"^[\n]$"#, input: "\r\n", match: nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"^[\u{0}-\u{7F}]$"#, input: "\r\n", match: nil,
+        semanticLevel: semantics
+      )
+
+      let scalarSemantics = semantics == .unicodeScalar
+      firstMatchTest(
+        #"\p{ASCII}"#, input: "\r\n", match:  scalarSemantics ? "\r" : nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"\r"#, input: "\r\n", match:  scalarSemantics ? "\r" : nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"[\r]"#, input: "\r\n", match:  scalarSemantics ? "\r" : nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"\n"#, input: "\r\n", match:  scalarSemantics ? "\n" : nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"[\n]"#, input: "\r\n", match:  scalarSemantics ? "\n" : nil,
+        semanticLevel: semantics
+      )
+      firstMatchTest(
+        #"[\u{0}-\u{7F}]"#, input: "\r\n", match:  scalarSemantics ? "\r" : nil,
+        semanticLevel: semantics
+      )
+    }
+  }
   
   func testCaseSensitivity() {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     matchTest(
       #"c..e"#,
       ("cafe", true),
@@ -1464,6 +2021,9 @@ extension RegexTests {
   }
   
   func testASCIIClasses() {
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     // 'D' ASCII-only digits
     matchTest(
       #"\d+"#,
@@ -1492,8 +2052,6 @@ extension RegexTests {
       ("aeiou", true),
       ("åe\u{301}ïôú", false))
 
-    // FIXME: Keep this until _wordIndex and friends are
-#if os(Linux)
     matchTest(
       #"abcd\b.+"#,
       ("abcd ef", true),
@@ -1509,7 +2067,6 @@ extension RegexTests {
       ("abcd ef", true),
       ("abcdef", false),
       ("abcdéf", false))
-#endif
 
     // 'S' ASCII-only spaces
     matchTest(
@@ -1635,6 +2192,9 @@ extension RegexTests {
   var eComposed: String { "é" }
   var eDecomposed: String { "e\u{301}" }
   
+  var eComposedUpper: String { "É" }
+  var eDecomposedUpper: String { "E\u{301}" }
+
   func testIndividualScalars() {
     // Expectation: A standalone Unicode scalar value in a regex literal
     // can match either that specific scalar value or participate in matching
@@ -1647,19 +2207,15 @@ extension RegexTests {
     firstMatchTest(#"\u{65 301}$"#, input: eComposed, match: eComposed)
 
     // FIXME: Implicit \y at end of match
-    firstMatchTest(#"\u{65}"#, input: eDecomposed, match: nil,
-      xfail: true)
+    firstMatchTest(#"\u{65}"#, input: eDecomposed, match: nil)
     firstMatchTest(#"\u{65}$"#, input: eDecomposed, match: nil)
-    // FIXME: \y is unsupported
-    firstMatchTest(#"\u{65}\y"#, input: eDecomposed, match: nil,
-      xfail: true)
+    firstMatchTest(#"\u{65}\y"#, input: eDecomposed, match: nil)
 
     // FIXME: Unicode scalars are only matched at the start of a grapheme cluster
     firstMatchTest(#"\u{301}"#, input: eDecomposed, match: "\u{301}",
       xfail: true)
-    // FIXME: \y is unsupported
-    firstMatchTest(#"\y\u{301}"#, input: eDecomposed, match: nil,
-      xfail: true)
+
+    firstMatchTest(#"\y\u{301}"#, input: eDecomposed, match: nil)
   }
 
   func testCanonicalEquivalence() throws {
@@ -1681,6 +2237,16 @@ extension RegexTests {
       #"e$"#,
       (eComposed, false),
       (eDecomposed, false))
+
+    matchTest(
+      #"\u{65 301}"#,
+      (eComposed, true),
+      (eDecomposed, true))
+
+    matchTest(
+      #"(?x) \u{65} \u{301}"#,
+      (eComposed, true),
+      (eDecomposed, true))
   }
 
   func testCanonicalEquivalenceCharacterClass() throws {
@@ -1717,41 +2283,70 @@ extension RegexTests {
     // \s
     firstMatchTest(#"\s"#, input: " ", match: " ")
     // FIXME: \s shouldn't match a number composed with a non-number character
-    firstMatchTest(#"\s\u{305}"#, input: " ", match: nil,
-              xfail: true)
+    firstMatchTest(#"\s\u{305}"#, input: " ", match: nil)
     // \p{Whitespace}
     firstMatchTest(#"\s"#, input: " ", match: " ")
-    // FIXME: \p{Whitespace} shouldn't match whitespace composed with a non-whitespace character
-    firstMatchTest(#"\s\u{305}"#, input: " ", match: nil,
-              xfail: true)
+    // \p{Whitespace} shouldn't match whitespace composed with a non-whitespace character
+    firstMatchTest(#"\s\u{305}"#, input: " ", match: nil)
   }
   
   func testCanonicalEquivalenceCustomCharacterClass() throws {
-    // Expectation: Concatenations with custom character classes should be able
-    // to match within a grapheme cluster. That is, a regex should be able to
-    // match the scalar values that comprise a grapheme cluster in separate,
-    // or repeated, custom character classes.
-    
+    // Expectation: Custom character class matches do not cross grapheme
+    // character boundaries by default. When matching with Unicode scalar
+    // semantics, grapheme cluster boundaries are ignored, so matching
+    // sequences of custom character classes can succeed.
+
+    // Must have new stdlib for character class ranges and word boundaries.
+    guard ensureNewStdlib() else { return }
+
     matchTest(
       #"[áéíóú]$"#,
       (eComposed, true),
       (eDecomposed, true))
 
-    // FIXME: Custom char classes don't use canonical equivalence with composed characters
-    firstMatchTest(#"e[\u{301}]$"#, input: eComposed, match: eComposed,
-              xfail: true)
-    firstMatchTest(#"e[\u{300}-\u{320}]$"#, input: eComposed, match: eComposed,
-              xfail: true)
-    firstMatchTest(#"[a-z][\u{300}-\u{320}]$"#, input: eComposed, match: eComposed,
-              xfail: true)
+    for input in [eDecomposed, eComposed] {
+      // Unicode scalar semantics means that only the decomposed version can
+      // match here.
+      let match = input.unicodeScalars.count == 2 ? input : nil
+      firstMatchTest(
+        #"e[\u{301}]$"#, input: input, match: match,
+        semanticLevel: .unicodeScalar)
+      firstMatchTest(
+        #"e[\u{300}-\u{320}]$"#, input: input, match: match,
+        semanticLevel: .unicodeScalar)
+      firstMatchTest(
+        #"[e][\u{300}-\u{320}]$"#, input: input, match: match,
+        semanticLevel: .unicodeScalar)
+      firstMatchTest(
+        #"[e-e][\u{300}-\u{320}]$"#, input: input, match: match,
+        semanticLevel: .unicodeScalar)
+      firstMatchTest(
+        #"[a-z][\u{300}-\u{320}]$"#, input: input, match: match,
+        semanticLevel: .unicodeScalar)
+    }
+    for input in [eComposed, eDecomposed] {
+      // Grapheme cluster semantics means that we can't match the 'e' separately
+      // from the accent.
+      firstMatchTest(#"e[\u{301}]$"#, input: input, match: nil)
+      firstMatchTest(#"e[\u{300}-\u{320}]$"#, input: input, match: nil)
+      firstMatchTest(#"[e][\u{300}-\u{320}]$"#, input: input, match: nil)
+      firstMatchTest(#"[e-e][\u{300}-\u{320}]$"#, input: input, match: nil)
+      firstMatchTest(#"[a-z][\u{300}-\u{320}]$"#, input: input, match: nil)
+
+      // A range that covers é (U+E9). Inputs are mapped to NFC, so match.
+      firstMatchTest(#"[\u{E8}-\u{EA}]"#, input: input, match: input)
+    }
 
-    // FIXME: Custom char classes don't match decomposed characters
-    firstMatchTest(#"e[\u{301}]$"#, input: eDecomposed, match: eDecomposed,
-              xfail: true)
-    firstMatchTest(#"e[\u{300}-\u{320}]$"#, input: eDecomposed, match: eDecomposed,
-              xfail: true)
-    firstMatchTest(#"[a-z][\u{300}-\u{320}]$"#, input: eDecomposed, match: eDecomposed,
-              xfail: true)
+    // A range that covers É (U+C9). Inputs are mapped to NFC, so match.
+    for input in [eComposedUpper, eDecomposedUpper] {
+      firstMatchTest(#"[\u{C8}-\u{CA}]"#, input: input, match: input)
+      firstMatchTest(#"[\u{C9}-\u{C9}]"#, input: input, match: input)
+    }
+    // Case insensitive matching of É (U+C9).
+    for input in [eComposed, eDecomposed, eComposedUpper, eDecomposedUpper] {
+      firstMatchTest(#"(?i)[\u{C8}-\u{CA}]"#, input: input, match: input)
+      firstMatchTest(#"(?i)[\u{C9}-\u{C9}]"#, input: input, match: input)
+    }
 
     let flag = "🇰🇷"
     firstMatchTest(#"🇰🇷"#, input: flag, match: flag)
@@ -1760,27 +2355,33 @@ extension RegexTests {
     firstMatchTest(#"\u{1F1F0 1F1F7}"#, input: flag, match: flag)
 
     // First Unicode scalar followed by CCC of regional indicators
-    firstMatchTest(#"\u{1F1F0}[\u{1F1E6}-\u{1F1FF}]"#, input: flag, match: flag,
-              xfail: true)
-
-    // FIXME: CCC of Regional Indicator doesn't match with both parts of a flag character
+    firstMatchTest(
+      #"^\u{1F1F0}[\u{1F1E6}-\u{1F1FF}]$"#, input: flag, match: flag,
+      semanticLevel: .unicodeScalar
+    )
+    // A CCC of regional indicators followed by the second Unicode scalar
+    firstMatchTest(
+      #"^[\u{1F1E6}-\u{1F1FF}]\u{1F1F7}$"#, input: flag, match: flag,
+      semanticLevel: .unicodeScalar
+    )
     // A CCC of regional indicators x 2
-    firstMatchTest(#"[\u{1F1E6}-\u{1F1FF}]{2}"#, input: flag, match: flag,
-              xfail: true)
+    firstMatchTest(
+      #"^[\u{1F1E6}-\u{1F1FF}]{2}$"#, input: flag, match: flag,
+      semanticLevel: .unicodeScalar
+    )
+    // A CCC of N regional indicators
+    firstMatchTest(
+      #"^[\u{1F1E6}-\u{1F1FF}]+$"#, input: flag, match: flag,
+      semanticLevel: .unicodeScalar
+    )
 
-    // FIXME: A single CCC of regional indicators matches the whole flag character
-    // A CCC of regional indicators followed by the second Unicode scalar
-    firstMatchTest(#"[\u{1F1E6}-\u{1F1FF}]\u{1F1F7}"#, input: flag, match: flag,
-              xfail: true)
     // A single CCC of regional indicators
-    firstMatchTest(#"[\u{1F1E6}-\u{1F1FF}]"#, input: flag, match: nil,
-              xfail: true)
-    
-    // A single CCC of actual flag emojis / combined regional indicators
-    firstMatchTest(#"[🇦🇫-🇿🇼]"#, input: flag, match: flag)
-    // This succeeds (correctly) because \u{1F1F0} is lexicographically
-    // within the CCC range
-    firstMatchTest(#"[🇦🇫-🇿🇼]"#, input: "\u{1F1F0}abc", match: "\u{1F1F0}")
+    firstMatchTest(
+      #"^[\u{1F1E6}-\u{1F1FF}]$"#, input: flag, match: nil)
+    firstMatchTest(
+      #"^[\u{1F1E6}-\u{1F1FF}]$"#, input: flag, match: nil,
+      semanticLevel: .unicodeScalar
+    )
   }
   
   func testAnyChar() throws {
@@ -1853,6 +2454,19 @@ extension RegexTests {
   
   // TODO: Add test for grapheme boundaries at start/end of match
 
+  // Testing the matchScalar optimization for ascii quoted literals and characters
+  func testScalarOptimization() throws {
+    // check that we are correctly doing the boundary check after matchScalar
+    firstMatchTest("a", input: "a\u{301}", match: nil)
+    firstMatchTest("aa", input: "aa\u{301}", match: nil)
+
+    firstMatchTest("a", input: "a\u{301}", match: "a", semanticLevel: .unicodeScalar)
+    firstMatchTest("aa", input: "aa\u{301}", match: "aa", semanticLevel: .unicodeScalar)
+
+    // case insensitive tests
+    firstMatchTest(#"(?i)abc\u{301}d"#, input: "AbC\u{301}d", match: "AbC\u{301}d", semanticLevel: .unicodeScalar)
+  }
+  
   func testCase() {
     let regex = try! Regex(#".\N{SPARKLING HEART}."#)
     let input = "🧟‍♀️💖🧠 or 🧠💖☕️"
@@ -1893,5 +2507,31 @@ extension RegexTests {
       XCTAssertEqual(matches.count, 3)
     }
   }
-}
 
+  func expectCompletion(regex: String, in target: String) {
+    let expectation = XCTestExpectation(description: "Run the given regex to completion")
+    Task.init {
+      let r = try! Regex(regex)
+      let val = target.matches(of: r).isEmpty
+      expectation.fulfill()
+      return val
+    }
+    wait(for: [expectation], timeout: 3.0)
+  }
+
+  func testQuantificationForwardProgress() {
+    expectCompletion(regex: #"(?:(?=a)){1,}"#, in: "aa")
+    expectCompletion(regex: #"(?:\b)+"#, in: "aa")
+    expectCompletion(regex: #"(?:(?#comment))+"#, in: "aa")
+    expectCompletion(regex: #"(?:|)+"#, in: "aa")
+    expectCompletion(regex: #"(?:\w|)+"#, in: "aa")
+    expectCompletion(regex: #"(?:\w|(?i-i:))+"#, in: "aa")
+    expectCompletion(regex: #"(?:\w|(?#comment))+"#, in: "aa")
+    expectCompletion(regex: #"(?:\w|(?#comment)(?i-i:))+"#, in: "aa")
+    expectCompletion(regex: #"(?:\w|(?i))+"#, in: "aa")
+    expectCompletion(regex: #"(a*)*"#, in: "aa")
+    expectCompletion(regex: #"(a?)*"#, in: "aa")
+    expectCompletion(regex: #"(a{,4})*"#, in: "aa")
+    expectCompletion(regex: #"((|)+)*"#, in: "aa")
+  }
+}
diff --git a/Tests/RegexTests/ParseTests.swift b/Tests/RegexTests/ParseTests.swift
index 3c43f27af..84ce361f3 100644
--- a/Tests/RegexTests/ParseTests.swift
+++ b/Tests/RegexTests/ParseTests.swift
@@ -359,14 +359,14 @@ extension RegexTests {
     parseTest(
       "(.)*(.*)",
       concat(
-        zeroOrMore(of: capture(atom(.any))),
-        capture(zeroOrMore(of: atom(.any)))),
+        zeroOrMore(of: capture(atom(.dot))),
+        capture(zeroOrMore(of: atom(.dot)))),
       captures: [.opt, .cap])
     parseTest(
       "((.))*((.)?)",
       concat(
-        zeroOrMore(of: capture(capture(atom(.any)))),
-        capture(zeroOrOne(of: capture(atom(.any))))),
+        zeroOrMore(of: capture(capture(atom(.dot)))),
+        capture(zeroOrOne(of: capture(atom(.dot))))),
       captures: [.opt, .opt, .cap, .opt])
     parseTest(
       #"abc\d"#,
@@ -374,10 +374,21 @@ extension RegexTests {
 
     // MARK: Allowed combining characters
 
-    parseTest("e\u{301}", "e\u{301}")
     parseTest("1\u{358}", "1\u{358}")
     parseTest(#"\ \#u{361}"#, " \u{361}")
 
+    parseTest("e\u{301}", "e\u{301}")
+    parseTest("[e\u{301}]", charClass("e\u{301}"))
+    parseTest("\u{E9}", "e\u{301}")
+    parseTest("[\u{E9}]", charClass("e\u{301}"))
+
+    parseTest(
+      "\\e\u{301}", "e\u{301}", throwsError: .invalidEscape("e\u{301}"))
+    parseTest(
+      "[\\e\u{301}]", charClass("e\u{301}"),
+      throwsError: .invalidEscape("e\u{301}")
+    )
+
     // MARK: Alternations
 
     parseTest(
@@ -479,7 +490,7 @@ extension RegexTests {
 
     parseTest(#"abc\d"#, concat("a", "b", "c", escaped(.decimalDigit)))
 
-    // FIXME: '\N' should be emitted through 'emitAny', not through the
+    // FIXME: '\N' should be emitted through 'emitDot', not through the
     // _CharacterClassModel model.
     parseTest(#"\N"#, escaped(.notNewline), unsupported: true)
 
@@ -2885,11 +2896,41 @@ extension RegexTests {
     diagnosticTest(#"[a-\Qbc\E]"#, .unsupported("range with quoted sequence"))
     diagnosticTest(#"[\Qbc\E-de]"#, .unsupported("range with quoted sequence"))
 
+    diagnosticTest(#"|([🇦🇫-🇿🇼])?"#, .invalidCharacterClassRangeOperand)
+    diagnosticTest(#"|([👨‍👩‍👦-👩‍👩‍👧‍👧])?"#, .invalidCharacterClassRangeOperand)
+
+    // Not single-scalar NFC.
+    diagnosticTest("[e\u{301}-e\u{302}]", .invalidCharacterClassRangeOperand)
+
+    // These scalar values expand under NFC.
+    let nfcExpandingScalars: [UInt32] = [
+      0x344, 0x958, 0x959, 0x95A, 0x95B, 0x95C, 0x95D, 0x95E, 0x95F, 0x9DC,
+      0x9DD, 0x9DF, 0xA33, 0xA36, 0xA59, 0xA5A, 0xA5B, 0xA5E, 0xB5C, 0xB5D,
+      0xF43, 0xF4D, 0xF52, 0xF57, 0xF5C, 0xF69, 0xF73, 0xF75, 0xF76, 0xF78,
+      0xF81, 0xF93, 0xF9D, 0xFA2, 0xFA7, 0xFAC, 0xFB9, 0x2ADC, 0xFB1D, 0xFB1F,
+      0xFB2A, 0xFB2B, 0xFB2C, 0xFB2D, 0xFB2E, 0xFB2F, 0xFB30, 0xFB31, 0xFB32,
+      0xFB33, 0xFB34, 0xFB35, 0xFB36, 0xFB38, 0xFB39, 0xFB3A, 0xFB3B, 0xFB3C,
+      0xFB3E, 0xFB40, 0xFB41, 0xFB43, 0xFB44, 0xFB46, 0xFB47, 0xFB48, 0xFB49,
+      0xFB4A, 0xFB4B, 0xFB4C, 0xFB4D, 0xFB4E, 0x1D15E, 0x1D15F, 0x1D160,
+      0x1D161, 0x1D162, 0x1D163, 0x1D164, 0x1D1BB, 0x1D1BC, 0x1D1BD, 0x1D1BE,
+      0x1D1BF, 0x1D1C0
+    ]
+    for scalar in nfcExpandingScalars {
+      let hex = String(scalar, radix: 16)
+      diagnosticTest(
+        #"[\u{\#(hex)}-\u{\#(hex)}]"#, .invalidCharacterClassRangeOperand)
+    }
+
+    // The NFC form of U+2126 is U+3A9.
+    diagnosticTest(#"[\u{2126}-\u{2126}]"#, .invalidCharacterClassRangeOperand)
+
     diagnosticTest(#"[_-A]"#, .invalidCharacterRange(from: "_", to: "A"))
     diagnosticTest(#"(?i)[_-A]"#, .invalidCharacterRange(from: "_", to: "A"))
     diagnosticTest(#"[c-b]"#, .invalidCharacterRange(from: "c", to: "b"))
     diagnosticTest(#"[\u{66}-\u{65}]"#, .invalidCharacterRange(from: "\u{66}", to: "\u{65}"))
 
+    diagnosticTest(#"[e\u{301}-e\u{302}]"#, .invalidCharacterRange(from: "\u{301}", to: "e"))
+
     diagnosticTest("(?x)[(?#)]", .expected("]"))
     diagnosticTest("(?x)[(?#abc)]", .expected("]"))
 
diff --git a/Tests/RegexTests/RenderDSLTests.swift b/Tests/RegexTests/RenderDSLTests.swift
index 97ba3e333..e925d255c 100644
--- a/Tests/RegexTests/RenderDSLTests.swift
+++ b/Tests/RegexTests/RenderDSLTests.swift
@@ -68,7 +68,38 @@ extension RenderDSLTests {
       }
       """)
   }
-  
+
+  func testDot() throws {
+    try testConversion(#".+"#, #"""
+      Regex {
+        OneOrMore {
+          /./
+        }
+      }
+      """#)
+    try testConversion(#"a.c"#, #"""
+      Regex {
+        "a"
+        /./
+        "c"
+      }
+      """#)
+  }
+
+  func testAnchor() throws {
+    try testConversion(#"^(?:a|b|c)$"#, #"""
+      Regex {
+        /^/
+        ChoiceOf {
+          "a"
+          "b"
+          "c"
+        }
+        /$/
+      }
+      """#)
+  }
+
   func testOptions() throws {
     try XCTExpectFailure("Options like '(?i)' aren't converted") {
       try testConversion(#"(?i)abc"#, """
@@ -117,4 +148,95 @@ extension RenderDSLTests {
       }
       """#)
   }
+
+  func testScalar() throws {
+    try testConversion(#"\u{B4}"#, #"""
+      Regex {
+        "\u{B4}"
+      }
+      """#)
+    try testConversion(#"\u{301}"#, #"""
+      Regex {
+        "\u{301}"
+      }
+      """#)
+    try testConversion(#"[\u{301}]"#, #"""
+      Regex {
+        One(.anyOf("\u{301}"))
+      }
+      """#)
+    try testConversion(#"[abc\u{301}]"#, #"""
+      Regex {
+        One(.anyOf("abc\u{301}"))
+      }
+      """#)
+
+    try testConversion(#"a\u{301}"#, #"""
+      Regex {
+        "a\u{301}"
+      }
+      """#)
+
+    try testConversion(#"(?x) a \u{301}"#, #"""
+      Regex {
+        "a\u{301}"
+      }
+      """#)
+
+    try testConversion(#"(?x) [ a b c \u{301} ] "#, #"""
+      Regex {
+        One(.anyOf("abc\u{301}"))
+      }
+      """#)
+
+    try testConversion(#"👨\u{200D}👨\u{200D}👧\u{200D}👦"#, #"""
+      Regex {
+        "👨\u{200D}👨\u{200D}👧\u{200D}👦"
+      }
+      """#)
+
+    try testConversion(#"(👨\u{200D}👨)\u{200D}👧\u{200D}👦"#, #"""
+      Regex {
+        Capture {
+          "👨\u{200D}👨"
+        }
+        "\u{200D}👧\u{200D}👦"
+      }
+      """#)
+
+    // We preserve the structure of non-capturing groups.
+    try testConversion(#"abcd(?:e\u{301}\d)"#, #"""
+      Regex {
+        "abcd"
+        Regex {
+          "e\u{301}"
+          One(.digit)
+        }
+      }
+      """#)
+
+    try testConversion(#"\u{A B C}"#, #"""
+      Regex {
+        "\u{A}\u{B}\u{C}"
+      }
+      """#)
+
+    // TODO: We might want to consider preserving scalar sequences in the DSL,
+    // and allowing them to merge with other concatenations.
+    try testConversion(#"\u{A B C}\u{d}efg"#, #"""
+      Regex {
+        "\u{A}\u{B}\u{C}"
+        "\u{D}efg"
+      }
+      """#)
+
+    // FIXME: We don't actually have a way of specifying in the DSL that we
+    // shouldn't join these together, should we print them as regex instead?
+    try testConversion(#"a(?:\u{301})"#, #"""
+      Regex {
+        "a"
+        "\u{301}"
+      }
+      """#)
+  }
 }
diff --git a/Tests/RegexTests/UTS18Tests.swift b/Tests/RegexTests/UTS18Tests.swift
index fa8a1729d..11479bfb6 100644
--- a/Tests/RegexTests/UTS18Tests.swift
+++ b/Tests/RegexTests/UTS18Tests.swift
@@ -21,6 +21,7 @@
 import XCTest
 @testable // for internal `matches(of:)`
 import _StringProcessing
+import TestSupport
 
 extension UnicodeScalar {
   var value4Digits: String {
@@ -222,7 +223,7 @@ extension UTS18Tests {
   // - Nonspacing marks are never divided from their base characters, and
   //   otherwise ignored in locating boundaries.
   func testSimpleWordBoundaries() {
-    let simpleWordRegex = regex(#".+?\b"#).wordBoundaryKind(.unicodeLevel1)
+    let simpleWordRegex = regex(#".+?\b"#).wordBoundaryKind(.simple)
     expectFirstMatch(input, simpleWordRegex, input[pos: ..<11])
     expectFirstMatch("don't", simpleWordRegex, "don")
     expectFirstMatch("Cafe\u{301}", simpleWordRegex, "Café")
@@ -316,6 +317,9 @@ extension UTS18Tests {
   // surrogate followed by a trailing surrogate shall be handled as a single
   // code point in matching.
   func testSupplementaryCodePoints() {
+    // Must have new stdlib for character class ranges.
+    guard ensureNewStdlib() else { return }
+
     XCTAssertTrue("👍".contains(regex(#"\u{1F44D}"#)))
     XCTAssertTrue("👍".contains(regex(#"[\u{1F440}-\u{1F44F}]"#)))
     XCTAssertTrue("👍👎".contains(regex(#"^[\u{1F440}-\u{1F44F}]+$"#)))
@@ -388,6 +392,9 @@ extension UTS18Tests {
   }
   
   func testCharacterClassesWithStrings() {
+    // Must have new stdlib for character class ranges.
+    guard ensureNewStdlib() else { return }
+
     let regex = regex(#"[a-z🧐🇧🇪🇧🇫🇧🇬]"#)
     XCTAssertEqual("🧐", "🧐".wholeMatch(of: regex)?.0)
     XCTAssertEqual("🇧🇫", "🇧🇫".wholeMatch(of: regex)?.0)