Skip to content

Commit

Permalink
make enums containing ByteBuffer fit in three words (#349)
Browse files Browse the repository at this point in the history
Motivation:

NIO's dynamic pipeline comes with one performance caveat: If your type
isn't specialised in NIOAny (only types in the NIO module can be) and
it's over 3 words (24 bytes on 64-bit platforms), you'll suffer an
allocation for every time you box it in NIOAny. Very unfortunately, both
ByteBuffer and FileRegion were exactly 3 words wide which means that any
enum containing those would be wider than 3 words. Worse, both
HTTP{Request,Response}Head contained types that were wider than 3 words.
The best solution to this problem is to shrink ByteBuffer and FileRegion
to just under 3 words and that's exactly what this PR is doing. That is
slightly tricky as ByteBuffer was already bit packed fairly well
(reader/writer indices & slice begin/end were stored as a UInt32). The
trick we employ now is to store the slice beginning in a UInt24 and the
file region reader index in a UInt56. That saves one byte for both
ByteBuffer and FileRegion with very moderate tradeoffs: The reader index
in a file now needs to be within 64 PiB (peta bytes) and a byte buffer
slice beginning must start within 16 MiB (mega bytes). Note: The
reader/writer indices as well as slice ends are _not_ affected and can
still be within 4 GiB. Clearly no one would care about the restrictions
for FileRegions in the real world but we might hit the ByteBuffer slice
beginning limit in which case the slice would be copied out. But
given that slices are mostly used to slice off headers in network
protocols, 16 MiB should be _plenty_.

Norman was kind enough to measure the perf differences:

master (before this PR):
```
$ wrk -c 256 -d 10s -t 4 -s ~/Downloads/pipeline-many.lua -H "X-Host: SomeValue" -H "Host: swiftnio.io" -H "ThereAreEvenMoreHeaders: AndMoreValues" http://localhost:8888
Running 10s test @ http://localhost:8888
  4 threads and 256 connections
  Thread Stats   Avg      Stdev     Max   +/- Stdev
    Latency    93.02ms  158.13ms   1.97s    93.07%
    Req/Sec   112.79k    26.05k  258.75k    84.50%
  4501098 requests in 10.04s, 214.63MB read
  Socket errors: connect 0, read 111, write 0, timeout 89
Requests/sec: 448485.61
Transfer/sec:     21.39MB
```

after this PR:
```
$ wrk -c 256 -d 10s -t 4 -s ~/Downloads/pipeline-many.lua -H "X-Host: SomeValue" -H "Host: swiftnio.io" -H "ThereAreEvenMoreHeaders: AndMoreValues" http://localhost:8888
Running 10s test @ http://localhost:8888
  4 threads and 256 connections
  Thread Stats   Avg      Stdev     Max   +/- Stdev
    Latency   107.53ms  206.56ms   1.99s    90.55%
    Req/Sec   124.15k    26.56k  290.41k    89.25%
  4952904 requests in 10.03s, 236.17MB read
  Socket errors: connect 0, read 161, write 0, timeout 22
Requests/sec: 493852.65
Transfer/sec:     23.55MB
```

so we see a nice 10% improvement

Modifications:

- shrank ByteBuffer by 1 byte, making it 23 bytes in total
- shrank FileRegion by 1 byte, making it 23 bytes in total
- added @_inlineable to NIOAny where appropriate to not suffer boxed existentials in different places
- added tests for the new edge cases

Result:

- more speed
- fewer allocations
  • Loading branch information
weissi authored Apr 24, 2018
1 parent 163827b commit db61cfe
Show file tree
Hide file tree
Showing 9 changed files with 386 additions and 11 deletions.
62 changes: 55 additions & 7 deletions Sources/NIO/ByteBuffer-core.swift
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,43 @@ let sysFree: @convention(c) (UnsafeMutableRawPointer?) -> Void = free
}
#endif

extension _ByteBufferSlice: Equatable {
static func ==(_ lhs: _ByteBufferSlice, _ rhs: _ByteBufferSlice) -> Bool {
return lhs._begin == rhs._begin && lhs.upperBound == rhs.upperBound
}
}

/// The slice of a `ByteBuffer`, it's different from `Range<UInt32>` because the lower bound is actually only
/// 24 bits (the upper bound is still 32). Before constructing, you need to make sure the lower bound actually
/// fits within 24 bits, otherwise the behaviour is undefined.
@_versioned
struct _ByteBufferSlice {
@_versioned var upperBound: ByteBuffer.Index
@_versioned var _begin: _UInt24
@_versioned var lowerBound: ByteBuffer.Index {
return UInt32(self._begin)
}
@_inlineable @_versioned var count: Int {
return Int(self.upperBound - self.lowerBound)
}
init() {
self._begin = 0
self.upperBound = 0
}
static var maxSupportedLowerBound: ByteBuffer.Index {
return ByteBuffer.Index(_UInt24.max)
}
}

extension _ByteBufferSlice {
init(_ range: Range<UInt32>) {
self = _ByteBufferSlice()

self._begin = _UInt24(range.lowerBound)
self.upperBound = range.upperBound
}
}

/// The preferred allocator for `ByteBuffer` values. The allocation strategy is opaque but is currently libc's
/// `malloc`, `realloc` and `free`.
///
Expand Down Expand Up @@ -163,15 +200,15 @@ public struct ByteBufferAllocator {
/// for doing so. In any case, if you use the `get` prefixed methods you are responsible for ensuring that you do not reach into uninitialized memory by taking the `readableBytes` and `readerIndex` into
/// account, and ensuring that you have previously written into the area covered by the `index itself.
public struct ByteBuffer {
typealias Slice = Range<Index>
typealias Slice = _ByteBufferSlice
typealias Allocator = ByteBufferAllocator
typealias Index = UInt32
typealias Capacity = UInt32

@_versioned private(set) var _storage: _Storage
@_versioned private(set) var _readerIndex: Index = 0
@_versioned private(set) var _writerIndex: Index = 0
@_versioned private(set) var _slice: Slice
@_versioned private(set) var _storage: _Storage

// MARK: Internal _Storage for CoW
@_versioned final class _Storage {
Expand All @@ -184,7 +221,7 @@ public struct ByteBuffer {
self.bytes = bytesNoCopy
self.capacity = capacity
self.allocator = allocator
self.fullSlice = 0..<self.capacity
self.fullSlice = _ByteBufferSlice(0..<self.capacity)
}

deinit {
Expand All @@ -210,7 +247,7 @@ public struct ByteBuffer {
allocator: self.allocator)
}

public func reallocSlice(_ slice: Slice, capacity: Capacity) -> _Storage {
public func reallocSlice(_ slice: Range<ByteBuffer.Index>, capacity: Capacity) -> _Storage {
assert(slice.count <= capacity)
let new = self.allocateStorage(capacity: capacity)
self.allocator.memcpy(new.bytes, self.bytes.advanced(by: Int(slice.lowerBound)), slice.count)
Expand All @@ -223,7 +260,7 @@ public struct ByteBuffer {
ptr.bindMemory(to: UInt8.self, capacity: Int(capacity))
self.bytes = ptr
self.capacity = capacity
self.fullSlice = 0..<self.capacity
self.fullSlice = _ByteBufferSlice(0..<self.capacity)
}

private func deallocate() {
Expand Down Expand Up @@ -282,7 +319,7 @@ public struct ByteBuffer {
} while newCapacity < index || newCapacity - index < capacity

self._storage.reallocStorage(capacity: newCapacity)
self._slice = _slice.lowerBound..<_slice.lowerBound + newCapacity
self._slice = _ByteBufferSlice(_slice.lowerBound..<_slice.lowerBound + newCapacity)
}
}

Expand Down Expand Up @@ -486,8 +523,19 @@ public struct ByteBuffer {
}
let index = _toIndex(index)
let length = _toCapacity(length)
let sliceStartIndex = self._slice.lowerBound + index

guard sliceStartIndex <= ByteBuffer.Slice.maxSupportedLowerBound else {
// the slice's begin is past the maximum supported slice begin value (16 MiB) so the only option we have
// is copy the slice into a fresh buffer. The slice begin will then be at index 0.
var new = self
new._moveWriterIndex(to: sliceStartIndex + length)
new._moveReaderIndex(to: sliceStartIndex)
new._copyStorageAndRebase(capacity: length, resetIndices: true)
return new
}
var new = self
new._slice = self._slice.lowerBound + index ..< self._slice.lowerBound + index+length
new._slice = _ByteBufferSlice(sliceStartIndex ..< self._slice.lowerBound + index+length)
new.moveReaderIndex(to: 0)
new._moveWriterIndex(to: length)
return new
Expand Down
20 changes: 16 additions & 4 deletions Sources/NIO/FileRegion.swift
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,23 @@ public struct FileRegion {
/// The `FileHandle` that is used by this `FileRegion`.
public let fileHandle: FileHandle

private let _endIndex: UInt64
private var _readerIndex: _UInt56

/// The current reader index of this `FileRegion`
private(set) public var readerIndex: Int
private(set) public var readerIndex: Int {
get {
return Int(self._readerIndex)
}
set {
self._readerIndex = _UInt56(newValue)
}
}

/// The end index of this `FileRegion`.
public let endIndex: Int
public var endIndex: Int {
return Int(self._endIndex)
}

/// Create a new `FileRegion` from an open `FileHandle`.
///
Expand All @@ -44,8 +56,8 @@ public struct FileRegion {
precondition(readerIndex <= endIndex, "readerIndex(\(readerIndex) must be <= endIndex(\(endIndex).")

self.fileHandle = fileHandle
self.readerIndex = readerIndex
self.endIndex = endIndex
self._readerIndex = _UInt56(readerIndex)
self._endIndex = UInt64(endIndex)
}

/// The number of readable bytes within this FileRegion (taking the `readerIndex` and `endIndex` into account).
Expand Down
137 changes: 137 additions & 0 deletions Sources/NIO/IntegerTypes.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the SwiftNIO open source project
//
// Copyright (c) 2017-2018 Apple Inc. and the SwiftNIO project authors
// Licensed under Apache License v2.0
//
// See LICENSE.txt for license information
// See CONTRIBUTORS.txt for the list of SwiftNIO project authors
//
// SPDX-License-Identifier: Apache-2.0
//
//===----------------------------------------------------------------------===//

// MARK: _UInt24

/// A 24-bit unsigned integer value type.
@_versioned
struct _UInt24: ExpressibleByIntegerLiteral {
typealias IntegerLiteralType = UInt16

@_versioned var b12: UInt16
@_versioned var b3: UInt8

private init(b12: UInt16, b3: UInt8) {
self.b12 = b12
self.b3 = b3
}

init(integerLiteral value: UInt16) {
self.init(b12: value, b3: 0)
}

static let bitWidth: Int = 24

static var max: _UInt24 {
return .init(b12: .max, b3: .max)
}

static let min: _UInt24 = 0
}

extension UInt32 {
init(_ value: _UInt24) {
var newValue: UInt32 = 0
newValue = UInt32(value.b12)
newValue |= UInt32(value.b3) << 16
self = newValue
}
}

extension Int {
init(_ value: _UInt24) {
var newValue: Int = 0
newValue = Int(value.b12)
newValue |= Int(value.b3) << 16
self = newValue
}
}

extension _UInt24 {
init(_ value: UInt32) {
assert(value & 0xff_00_00_00 == 0, "value \(value) too large for _UInt24")
self.b12 = UInt16(truncatingIfNeeded: value & 0xff_ff)
self.b3 = UInt8(value >> 16)
}
}

extension _UInt24: Equatable {
static func ==(_ lhs: _UInt24, _ rhs: _UInt24) -> Bool {
return lhs.b12 == rhs.b12 && lhs.b3 == rhs.b3
}
}

// MARK: _UInt56

/// A 56-bit unsigned integer value type.
struct _UInt56: ExpressibleByIntegerLiteral {
typealias IntegerLiteralType = UInt32

@_versioned var b1234: UInt32
@_versioned var b56: UInt16
@_versioned var b7: UInt8

private init(b1234: UInt32, b56: UInt16, b7: UInt8) {
self.b1234 = b1234
self.b56 = b56
self.b7 = b7
}

init(integerLiteral value: UInt32) {
self.init(b1234: value, b56: 0, b7: 0)
}

static let bitWidth: Int = 56

static var max: _UInt56 {
return .init(b1234: .max, b56: .max, b7: .max)
}

static let min: _UInt56 = 0
}

extension _UInt56 {
init(_ value: UInt64) {
assert(value & 0xff_00_00_00_00_00_00_00 == 0, "value \(value) too large for _UInt56")
self.init(b1234: UInt32(truncatingIfNeeded: (value & 0xff_ff_ff_ff) >> 0 ),
b56: UInt16(truncatingIfNeeded: (value & 0xff_ff_00_00_00_00) >> 32),
b7: UInt8( value >> 48))
}

init(_ value: Int) {
self.init(UInt64(value))
}
}

extension UInt64 {
init(_ value: _UInt56) {
var newValue: UInt64 = 0
newValue = UInt64(value.b1234)
newValue |= UInt64(value.b56 ) << 32
newValue |= UInt64(value.b7 ) << 48
self = newValue
}
}

extension Int {
init(_ value: _UInt56) {
self = Int(UInt64(value))
}
}

extension _UInt56: Equatable {
static func ==(_ lhs: _UInt56, _ rhs: _UInt56) -> Bool {
return lhs.b1234 == rhs.b1234 && lhs.b56 == rhs.b56 && lhs.b7 == rhs.b7
}
}
3 changes: 3 additions & 0 deletions Sources/NIO/NIOAny.swift
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,18 @@ public struct NIOAny {
/// Wrap a value in a `NIOAny`. In most cases you should not create a `NIOAny` directly using this constructor.
/// The abstraction that accepts values of type `NIOAny` must also provide a mechanism to do the wrapping. An
/// example is a `ChannelInboundHandler` which provides `self.wrapInboundOut(aValueOfTypeInboundOut)`.
@_inlineable
public init<T>(_ value: T) {
self._storage = _NIOAny(value)
}

@_versioned
enum _NIOAny {
case ioData(IOData)
case bufferEnvelope(AddressedEnvelope<ByteBuffer>)
case other(Any)

@_inlineable @_versioned
init<T>(_ value: T) {
switch value {
case let value as ByteBuffer:
Expand Down
8 changes: 8 additions & 0 deletions Sources/NIO/TypeAssistedChannelHandler.swift
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@ public protocol _EmittingChannelHandler {
associatedtype OutboundOut = Never

/// Wrap the provided `OutboundOut` that will be passed to the next `ChannelOutboundHandler` by calling `ChannelHandlerContext.write`.
@_inlineable
func wrapOutboundOut(_ value: OutboundOut) -> NIOAny
}

/// Default implementations for `_EmittingChannelHandler`.
extension _EmittingChannelHandler {
@_inlineable
public func wrapOutboundOut(_ value: OutboundOut) -> NIOAny {
return NIOAny(value)
}
Expand All @@ -41,18 +43,22 @@ public protocol ChannelInboundHandler: _ChannelInboundHandler, _EmittingChannelH
associatedtype InboundOut = Never

/// Unwrap the provided `NIOAny` that was passed to `channelRead`.
@_inlineable
func unwrapInboundIn(_ value: NIOAny) -> InboundIn

/// Wrap the provided `InboundOut` that will be passed to the next `ChannelInboundHandler` by calling `ChannelHandlerContext.fireChannelRead`.
@_inlineable
func wrapInboundOut(_ value: InboundOut) -> NIOAny
}

/// Default implementations for `ChannelInboundHandler`.
extension ChannelInboundHandler {
@_inlineable
public func unwrapInboundIn(_ value: NIOAny) -> InboundIn {
return value.forceAs()
}

@_inlineable
public func wrapInboundOut(_ value: InboundOut) -> NIOAny {
return NIOAny(value)
}
Expand All @@ -66,11 +72,13 @@ public protocol ChannelOutboundHandler: _ChannelOutboundHandler, _EmittingChanne
associatedtype OutboundIn

/// Unwrap the provided `NIOAny` that was passed to `write`.
@_inlineable
func unwrapOutboundIn(_ value: NIOAny) -> OutboundIn
}

/// Default implementations for `ChannelOutboundHandler`.
extension ChannelOutboundHandler {
@_inlineable
public func unwrapOutboundIn(_ value: NIOAny) -> OutboundIn {
return value.forceAs()
}
Expand Down
3 changes: 3 additions & 0 deletions Tests/NIOTests/ByteBufferTest+XCTest.swift
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ extension ByteBufferTest {
("testUnderestimatingSequenceWorks", testUnderestimatingSequenceWorks),
("testZeroSizeByteBufferResizes", testZeroSizeByteBufferResizes),
("testSpecifyTypesAndEndiannessForIntegerMethods", testSpecifyTypesAndEndiannessForIntegerMethods),
("testByteBufferFitsInACoupleOfEnums", testByteBufferFitsInACoupleOfEnums),
("testLargeSliceBegin16MBIsOkayAndDoesNotCopy", testLargeSliceBegin16MBIsOkayAndDoesNotCopy),
("testLargeSliceBeginMoreThan16MBIsOkay", testLargeSliceBeginMoreThan16MBIsOkay),
]
}
}
Expand Down
Loading

0 comments on commit db61cfe

Please sign in to comment.