@@ -291,7 +291,7 @@ extension Processor {
291291 _ bitset: DSLTree . CustomCharacterClass . AsciiBitset ,
292292 isScalarSemantics: Bool
293293 ) -> Bool {
294- guard let next = input. matchBitset (
294+ guard let next = input. matchASCIIBitset (
295295 bitset,
296296 at: currentPosition,
297297 limitedBy: end,
@@ -727,30 +727,59 @@ extension String {
727727 return idx
728728 }
729729
730- func matchBitset(
730+ // TODO: effects? release none? conuming self?
731+ private func _getNextIndex(
732+ at pos: Index , isScalarSemantics: Bool , returnNil: Bool
733+ ) -> Index ? {
734+ assert ( pos < endIndex)
735+ if returnNil { return nil }
736+ if isScalarSemantics {
737+ return self . unicodeScalars. index ( after: pos)
738+ }
739+ return self . index ( after: pos)
740+ }
741+
742+ func matchASCIIBitset(
731743 _ bitset: DSLTree . CustomCharacterClass . AsciiBitset ,
732744 at pos: Index ,
733745 limitedBy end: Index ,
734746 isScalarSemantics: Bool
735747 ) -> Index ? {
736- // TODO: extremely quick-check-able
737- // TODO: can be sped up with string internals
738748 assert ( end <= endIndex)
739749
740750 guard pos < end else { return nil }
741751
742- let idx : String . Index
743- if isScalarSemantics {
744- guard bitset. matches ( unicodeScalars [ pos] ) else { return nil }
745- idx = unicodeScalars. index ( after: pos)
746- } else {
747- guard bitset. matches ( self [ pos] ) else { return nil }
748- idx = index ( after: pos)
752+ // TODO: Inversion should be tracked and handled in only one place..
753+ let isInverted = bitset. isInverted
754+
755+ // TODO: Want something more specialized, so overhaul/refactor _quickASCIICharacter
756+ guard let ( byte, next, isCRLF) = _quickASCIICharacter ( at: pos) else {
757+ // FIXME: what if following index is beyond end?
758+ if isScalarSemantics {
759+ return bitset. matches ( self . unicodeScalars [ pos] ) ? self . unicodeScalars. index ( after: pos) : nil
760+ }
761+
762+ return bitset. matches ( self [ pos] ) ? self . index ( after: pos) : nil
749763 }
750764
751- guard idx <= end else { return nil }
752- return idx
753- }
765+ // TODO: refactor, this checks the inversion property for us
766+ guard bitset. matches ( byte) else {
767+ return nil
768+ }
754769
770+ // CR-LF should only match `[\r]` in scalar semantic mode or if inverted
771+ if isCRLF {
772+ // TODO: what if next is past `end` because CRLF?
773+ // FIXME: quickASCIICharacter probably needs a limtedBy argument
774+ if isScalarSemantics {
775+ return self . unicodeScalars. index ( before: next)
776+ }
777+ if isInverted {
778+ return next
779+ }
780+ return nil
781+ }
755782
783+ return next
784+ }
756785}
0 commit comments