diff --git a/stdlib/public/core/ContiguouslyStored.swift b/stdlib/public/core/ContiguouslyStored.swift index 55b625ec8701a..2cdf1c390158e 100644 --- a/stdlib/public/core/ContiguouslyStored.swift +++ b/stdlib/public/core/ContiguouslyStored.swift @@ -10,6 +10,10 @@ // //===----------------------------------------------------------------------===// +// NOTE: The below is necessary for fast String initialization from untyped +// memory. When we add Collection.withContiguousRawStorageIfAvailabe(), we can +// deprecate this functionality. + @usableFromInline internal protocol _HasContiguousBytes { func withUnsafeBytes( diff --git a/stdlib/public/core/String.swift b/stdlib/public/core/String.swift index 3ac1c1b45f03e..95961c5abdbdb 100644 --- a/stdlib/public/core/String.swift +++ b/stdlib/public/core/String.swift @@ -388,6 +388,22 @@ extension String { } extension String { + // This force type-casts element to UInt8, since we cannot currently + // communicate to the type checker that we proved this with our dynamic + // check in String(decoding:as:). + @_alwaysEmitIntoClient + @inline(never) // slow-path + private static func _fromNonContiguousUnsafeBitcastUTF8Repairing< + C: Collection + >(_ input: C) -> (result: String, repairsMade: Bool) { + _internalInvariant(C.Element.self == UInt8.self) + return Array(input).withUnsafeBufferPointer { + let raw = UnsafeRawBufferPointer($0) + return String._fromUTF8Repairing(raw.bindMemory(to: UInt8.self)) + } + } + + /// Creates a string from the given Unicode code units in the specified /// encoding. /// @@ -407,8 +423,27 @@ extension String { return } + // Fast path for user-defined Collections and typed contiguous collections. + // + // Note: this comes first, as the optimizer nearly always has insight into + // wCSIA, but cannot prove that a type does not have conformance to + // _HasContiguousBytes. + if let str = codeUnits.withContiguousStorageIfAvailable({ + (buffer: UnsafeBufferPointer) -> String in + Builtin.onFastPath() // encourage SIL Optimizer to inline this closure :-( + let rawBufPtr = UnsafeRawBufferPointer(buffer) + return String._fromUTF8Repairing( + UnsafeBufferPointer( + start: rawBufPtr.baseAddress?.assumingMemoryBound(to: UInt8.self), + count: rawBufPtr.count)).0 + }) { + self = str + return + } + + // Fast path for untyped raw storage and known stdlib types if let contigBytes = codeUnits as? _HasContiguousBytes, - contigBytes._providesContiguousBytesNoCopy + contigBytes._providesContiguousBytesNoCopy { self = contigBytes.withUnsafeBytes { rawBufPtr in return String._fromUTF8Repairing( @@ -419,15 +454,9 @@ extension String { return } - // Just copying to an Array is significantly faster than performing - // generic operations - self = Array(codeUnits).withUnsafeBufferPointer { - let raw = UnsafeRawBufferPointer($0) - return String._fromUTF8Repairing(raw.bindMemory(to: UInt8.self)).0 - } - return + self = String._fromNonContiguousUnsafeBitcastUTF8Repairing(codeUnits).0 } - + /// Creates a new string with the specified capacity in UTF-8 code units, and /// then calls the given closure with a buffer covering the string's /// uninitialized memory. @@ -484,7 +513,7 @@ extension String { initializingUTF8With: initializer ) } - + @inline(__always) internal init( _uninitializedCapacity capacity: Int, @@ -503,7 +532,7 @@ extension String { } return } - + self = try String._fromLargeUTF8Repairing( uninitializedCapacity: capacity, initializingWith: initializer) @@ -968,12 +997,12 @@ extension _StringGutsSlice { var outputBuffer = outputBuffer var icuInputBuffer = icuInputBuffer var icuOutputBuffer = icuOutputBuffer - + var index = range.lowerBound let cachedEndIndex = range.upperBound - + var hasBufferOwnership = false - + defer { if hasBufferOwnership { outputBuffer.deallocate() @@ -981,7 +1010,7 @@ extension _StringGutsSlice { icuOutputBuffer.deallocate() } } - + while index < cachedEndIndex { let result = _foreignNormalize( readIndex: index, @@ -1017,9 +1046,9 @@ internal func _fastWithNormalizedCodeUnitsImpl( var index = String.Index(_encodedOffset: 0) let cachedEndIndex = String.Index(_encodedOffset: sourceBuffer.count) - + var hasBufferOwnership = false - + defer { if hasBufferOwnership { outputBuffer.deallocate() @@ -1027,7 +1056,7 @@ internal func _fastWithNormalizedCodeUnitsImpl( icuOutputBuffer.deallocate() } } - + while index < cachedEndIndex { let result = _fastNormalize( readIndex: index, diff --git a/stdlib/public/core/StringCreate.swift b/stdlib/public/core/StringCreate.swift index f1d21d15daccc..d30fd0d52b67d 100644 --- a/stdlib/public/core/StringCreate.swift +++ b/stdlib/public/core/StringCreate.swift @@ -50,7 +50,7 @@ internal func _allASCII(_ input: UnsafeBufferPointer) -> Bool { } extension String { - + internal static func _uncheckedFromASCII( _ input: UnsafeBufferPointer ) -> String { @@ -61,7 +61,7 @@ extension String { let storage = __StringStorage.create(initializingFrom: input, isASCII: true) return storage.asString } - + @usableFromInline internal static func _fromASCII( _ input: UnsafeBufferPointer @@ -69,7 +69,7 @@ extension String { _internalInvariant(_allASCII(input), "not actually ASCII") return _uncheckedFromASCII(input) } - + internal static func _fromASCIIValidating( _ input: UnsafeBufferPointer ) -> String? { @@ -101,7 +101,7 @@ extension String { return (repairUTF8(input, firstKnownBrokenRange: initialRange), true) } } - + internal static func _fromLargeUTF8Repairing( uninitializedCapacity capacity: Int, initializingWith initializer: ( @@ -111,7 +111,7 @@ extension String { let result = try __StringStorage.create( uninitializedCodeUnitCapacity: capacity, initializingUncheckedUTF8With: initializer) - + switch validateUTF8(result.codeUnits) { case .success(let info): result._updateCountAndFlags( @@ -181,7 +181,7 @@ extension String { return contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) } } - + @inline(never) // slow path private static func _slowFromCodeUnits< Input: Collection, @@ -209,7 +209,7 @@ extension String { let str = contents.withUnsafeBufferPointer { String._uncheckedFromUTF8($0) } return (str, repaired) } - + @usableFromInline @inline(never) // can't be inlined w/out breaking ABI @_specialize( where Input == UnsafeBufferPointer, Encoding == Unicode.ASCII) @@ -227,27 +227,39 @@ extension String { guard _fastPath(encoding == Unicode.ASCII.self) else { return _slowFromCodeUnits(input, encoding: encoding, repair: repair) } - - var result:String? = nil - + + // Helper to simplify early returns + func resultOrSlow(_ resultOpt: String?) -> (String, repairsMade: Bool)? { + guard let result = resultOpt else { + return _slowFromCodeUnits(input, encoding: encoding, repair: repair) + } + return (result, repairsMade: false) + } + + // Fast path for untyped raw storage and known stdlib types if let contigBytes = input as? _HasContiguousBytes, contigBytes._providesContiguousBytesNoCopy { - result = contigBytes.withUnsafeBytes { rawBufPtr in + return resultOrSlow(contigBytes.withUnsafeBytes { rawBufPtr in let buffer = UnsafeBufferPointer( start: rawBufPtr.baseAddress?.assumingMemoryBound(to: UInt8.self), count: rawBufPtr.count) return String._fromASCIIValidating(buffer) - } - } else { - result = Array(input).withUnsafeBufferPointer { + }) + } + + // Fast path for user-defined Collections + if let strOpt = input.withContiguousStorageIfAvailable({ + (buffer: UnsafeBufferPointer) -> String? in + return String._fromASCIIValidating( + UnsafeRawBufferPointer(buffer).bindMemory(to: UInt8.self)) + }) { + return resultOrSlow(strOpt) + } + + return resultOrSlow(Array(input).withUnsafeBufferPointer { let buffer = UnsafeRawBufferPointer($0).bindMemory(to: UInt8.self) return String._fromASCIIValidating(buffer) - } - } - - return result != nil ? - (result!, repairsMade: false) : - _slowFromCodeUnits(input, encoding: encoding, repair: repair) + }) } public // @testable diff --git a/stdlib/public/core/Substring.swift b/stdlib/public/core/Substring.swift index fabcac52b96b6..b7050c93c0bf5 100644 --- a/stdlib/public/core/Substring.swift +++ b/stdlib/public/core/Substring.swift @@ -391,6 +391,7 @@ extension Substring.UTF8View: BidirectionalCollection { } @_alwaysEmitIntoClient + @inlinable public func withContiguousStorageIfAvailable( _ body: (UnsafeBufferPointer) throws -> R ) rethrows -> R? { diff --git a/test/SILOptimizer/utf8_decoding_fastpath.swift b/test/SILOptimizer/utf8_decoding_fastpath.swift index b76bd2bc0212d..a77cd0cae3b87 100644 --- a/test/SILOptimizer/utf8_decoding_fastpath.swift +++ b/test/SILOptimizer/utf8_decoding_fastpath.swift @@ -1,4 +1,4 @@ -// RUN: %target-swift-frontend -Xllvm -swiftmergefunc-threshold=0 -parse-as-library -O -target-cpu core2 -emit-ir %s | %FileCheck %s +// RUN: %target-swift-frontend -O -emit-sil %s | %FileCheck %s // REQUIRES: optimized_stdlib,CPU=x86_64 // This is an end-to-end test to ensure that the optimizer generates @@ -12,101 +12,199 @@ func blackhole(_ value: T) {} // UnsafeBufferPointer -// ========================== -// CHECK-LABEL: define {{.*}}swiftcc {{.*}}s22utf8_decoding_fastpath15decodeUBPAsUTF8ySSSRys5UInt8VGF{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK: {{.*}} = call swiftcc {{.*}} @"$sSS18_fromUTF8Repairing{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK-LAST: ret +// +// CHECK-LABEL: sil {{.*}}decodeUBPAsUTF{{.*}} : $@convention +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK: function_ref {{.*}}_fromUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK-LABEL: end sil function {{.*}}decodeUBPAsUTF public func decodeUBPAsUTF8(_ ptr: UnsafeBufferPointer) -> String { return String(decoding: ptr, as: Unicode.UTF8.self) } // UnsafeMutableBufferPointer -// ================================= -// CHECK-LABEL: define {{.*}}swiftcc {{.*}}s22utf8_decoding_fastpath16decodeUMBPAsUTF8ySSSrys5UInt8VGF{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK: {{.*}} = call swiftcc {{.*}} @"$sSS18_fromUTF8Repairing{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK-LAST: ret +// +// CHECK-LABEL: sil {{.*}}decodeUMBPAsUTF8{{.*}} : $@convention +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK: function_ref {{.*}}_fromUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK-LABEL: end sil function{{.*}}decodeUMBPAsUTF8 public func decodeUMBPAsUTF8(_ ptr: UnsafeMutableBufferPointer) -> String { return String(decoding: ptr, as: Unicode.UTF8.self) } // Array -// ============ -// CHECK-LABEL: define {{.*}}swiftcc {{.*}}decodeArrayAsUTF8{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK: {{.*}} = call swiftcc {{.*}} @"$sSS18_fromUTF8Repairing{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK-LAST: ret +// +// CHECK-LABEL: sil {{.*}}decodeArrayAsUTF8{{.*}} : $@convention +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK: function_ref {{.*}}_fromUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK-LABEL: end sil function{{.*}}decodeArrayAsUTF8 public func decodeArrayAsUTF8(_ ptr: [UInt8]) -> String { return String(decoding: ptr, as: Unicode.UTF8.self) } // UnsafeRawBufferPointer -// ====================== -// CHECK-LABEL: define {{.*}}swiftcc {{.*}}decodeURBPAsUTF8{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK: {{.*}} = call swiftcc {{.*}} @"$sSS18_fromUTF8Repairing{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK-LAST: ret +// +// CHECK-LABEL: sil {{.*}}decodeURBPAsUTF8{{.*}} : $@convention +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK: function_ref {{.*}}_fromUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK-LABEL: end sil function{{.*}}decodeURBPAsUTF8 public func decodeURBPAsUTF8(_ ptr: UnsafeRawBufferPointer) -> String { return String(decoding: ptr, as: Unicode.UTF8.self) } // UnsafeMutableRawBufferPointer -// ============================= -// CHECK-LABEL: define {{.*}}swiftcc {{.*}}decodeUMRBPAsUTF8{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK: {{.*}} = call swiftcc {{.*}} @"$sSS18_fromUTF8Repairing{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK-LAST: ret +// +// CHECK-LABEL: sil {{.*}}decodeUMRBPAsUTF8{{.*}} : $@convention +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK: function_ref {{.*}}_fromUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK-LABEL: end sil function{{.*}}decodeUMRBPAsUTF8 public func decodeUMRBPAsUTF8(_ ptr: UnsafeMutableRawBufferPointer) -> String { return String(decoding: ptr, as: Unicode.UTF8.self) } // String.UTF8View -// =============== -// CHECK-LABEL: define {{.*}}swiftcc {{.*}}decodeStringUTF8ViewAs{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK: {{.*}} = call swiftcc {{.*}} @"$sSS18_fromUTF8Repairing{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK-LAST: br +// +// CHECK-LABEL: sil {{.*}}decodeStringUTF8ViewAs{{.*}} : $@convention +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK-DAG: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-DAG: function_ref {{.*}}_fromUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK-LABEL: end sil function{{.*}}decodeStringUTF8ViewAs public func decodeStringUTF8ViewAsUTF8(_ ptr: String.UTF8View) -> String { return String(decoding: ptr, as: Unicode.UTF8.self) } // Substring.UTF8View -// ================== -// CHECK-LABEL: define {{.*}}swiftcc {{.*}}decodeSubstringUTF8ViewAs{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK: {{.*}} = call swiftcc {{.*}} @"$sSS18_fromUTF8Repairing{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK-LAST: br +// +// NOTE: withContiguousStorageIfAvailable is not currently inlined at the SIL +// level, so we have to disable the UTF8Repairing check :-( +// +// CHECK-LABEL: sil {{.*}}decodeSubstringUTF8ViewAs{{.*}} : $@convention +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK-DAG: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// xCHECK-DAG: function_ref {{.*}}_fromUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK-LABEL: end sil function{{.*}}decodeSubstringUTF8ViewAs public func decodeSubstringUTF8ViewAsUTF8(_ ptr: Substring.UTF8View) -> String { return String(decoding: ptr, as: Unicode.UTF8.self) } // Slice -// ========== -// CHECK-LABEL: define {{.*}}swiftcc {{.*}}decodeUBPSliceAsUTF8{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK: {{.*}} = call swiftcc {{.*}} @"$sSS18_fromUTF8Repairing{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK-LAST: br +// +// CHECK-LABEL: sil {{.*}}decodeUBPSliceAsUTF8{{.*}} : $@convention +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK: function_ref {{.*}}_fromUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK-LABEL: end sil function{{.*}}decodeUBPSliceAsUTF8 public func decodeUBPSliceAsUTF8(_ ptr: Slice>) -> String { return String(decoding: ptr, as: Unicode.UTF8.self) } // Slice -// =========== -// CHECK-LABEL: define {{.*}}swiftcc {{.*}}decodeURBPSliceAsUTF8{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK: {{.*}} = call swiftcc {{.*}} @"$sSS18_fromUTF8Repairing{{.*}} -// CHECK-NOT: _fromCodeUnits -// CHECK: ret +// +// CHECK-LABEL: sil {{.*}}decodeURBPSliceAsUTF8{{.*}} : $@convention +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK: function_ref {{.*}}_fromUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK-LABEL: end sil function{{.*}}decodeURBPSliceAsUTF8 public func decodeURBPSliceAsUTF8(_ ptr: Slice>) -> String { - blackhole("foo") // otherwise it just jumps into the Slice version return String(decoding: ptr, as: Unicode.UTF8.self) } + +public struct CustomContiguousCollection: Collection { + let storage: [UInt8] + public typealias Index = Int + public typealias Element = UInt8 + + public init(_ bytes: [UInt8]) { self.storage = bytes } + public subscript(position: Int) -> Element { self.storage[position] } + public var startIndex: Index { 0 } + public var endIndex: Index { storage.count } + public func index(after i: Index) -> Index { i+1 } + + @inline(__always) + public func withContiguousStorageIfAvailable( + _ body: (UnsafeBufferPointer) throws -> R + ) rethrows -> R? { + try storage.withContiguousStorageIfAvailable(body) + } +} +public struct CustomNonContiguousCollection: Collection { + let storage: [UInt8] + public typealias Index = Int + public typealias Element = UInt8 + + public init(_ bytes: [UInt8]) { self.storage = bytes } + public subscript(position: Int) -> Element { self.storage[position] } + public var startIndex: Index { 0 } + public var endIndex: Index { storage.count } + public func index(after i: Index) -> Index { i+1 } + + @inline(__always) + public func withContiguousStorageIfAvailable( + _ body: (UnsafeBufferPointer) throws -> R + ) rethrows -> R? { + nil + } +} + +// CustomContiguousCollection +// +// CHECK-LABEL: sil {{.*}}decodeCustomContiguousAsUTF8{{.*}} : $@convention +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK: function_ref {{.*}}_fromUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK-LABEL: end sil function{{.*}}decodeCustomContiguousAsUTF8 +public func decodeCustomContiguousAsUTF8(_ c: CustomContiguousCollection) -> String { + return String(decoding: c, as: UTF8.self) +} + +// CustomNonContiguousCollection +// +// CHECK-LABEL: sil {{.*}}decodeCustomNonContiguousAsUTF8{{.*}} : $@convention +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK-NOT: function_ref {{.*}}_fromUTF8Repairing +// CHECK: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-NOT: function_ref {{.*}}_fromCodeUnits +// CHECK-NOT: function_ref {{.*}}_fromUTF8Repairing +// CHECK-LABEL: end sil function{{.*}}decodeCustomNonContiguousAsUTF8 +public func decodeCustomNonContiguousAsUTF8(_ c: CustomNonContiguousCollection) -> String { + return String(decoding: c, as: UTF8.self) +} + +// UTF-16 +// +// NOTE: The SIL optimizer cannot currently fold away a (UTF16.self == +// UTF8.self) metatype comparison, so we have to disabel the check-not for UTF-8 +// construction :-( +// +// CHECK-LABEL: sil {{.*}}decodeUTF16{{.*}} : $@convention +// xCHECK-NOT: function_ref {{.*}}_fromUTF8Repairing +// xCHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK: function_ref {{.*}}_fromCodeUnits +// xCHECK-NOT: function_ref {{.*}}_fromUTF8Repairing +// xCHECK-NOT: function_ref {{.*}}_fromNonContiguousUnsafeBitcastUTF8Repairing +// CHECK-LABEL: end sil function{{.*}}decodeUTF16 +public func decodeUTF16(_ c: Array) -> String { + return String(decoding: c, as: UTF16.self) +}