Skip to content

Commit

Permalink
apacheGH-43168: Add buffer and array builders for Struct type
Browse files Browse the repository at this point in the history
  • Loading branch information
abandy committed Jul 11, 2024
1 parent 84df343 commit 53b09f4
Show file tree
Hide file tree
Showing 5 changed files with 269 additions and 44 deletions.
46 changes: 21 additions & 25 deletions swift/Arrow/Sources/Arrow/ArrowArray.swift
Original file line number Diff line number Diff line change
Expand Up @@ -78,41 +78,41 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder {
_ arrowType: ArrowType, with: ArrowData) throws -> ArrowArrayHolder {
switch arrowType.id {
case .int8:
return ArrowArrayHolderImpl(FixedArray<Int8>(with))
return try ArrowArrayHolderImpl(FixedArray<Int8>(with))
case .int16:
return ArrowArrayHolderImpl(FixedArray<Int16>(with))
return try ArrowArrayHolderImpl(FixedArray<Int16>(with))
case .int32:
return ArrowArrayHolderImpl(FixedArray<Int32>(with))
return try ArrowArrayHolderImpl(FixedArray<Int32>(with))
case .int64:
return ArrowArrayHolderImpl(FixedArray<Int64>(with))
return try ArrowArrayHolderImpl(FixedArray<Int64>(with))
case .uint8:
return ArrowArrayHolderImpl(FixedArray<UInt8>(with))
return try ArrowArrayHolderImpl(FixedArray<UInt8>(with))
case .uint16:
return ArrowArrayHolderImpl(FixedArray<UInt16>(with))
return try ArrowArrayHolderImpl(FixedArray<UInt16>(with))
case .uint32:
return ArrowArrayHolderImpl(FixedArray<UInt32>(with))
return try ArrowArrayHolderImpl(FixedArray<UInt32>(with))
case .uint64:
return ArrowArrayHolderImpl(FixedArray<UInt64>(with))
return try ArrowArrayHolderImpl(FixedArray<UInt64>(with))
case .double:
return ArrowArrayHolderImpl(FixedArray<Double>(with))
return try ArrowArrayHolderImpl(FixedArray<Double>(with))
case .float:
return ArrowArrayHolderImpl(FixedArray<Float>(with))
return try ArrowArrayHolderImpl(FixedArray<Float>(with))
case .date32:
return ArrowArrayHolderImpl(Date32Array(with))
return try ArrowArrayHolderImpl(Date32Array(with))
case .date64:
return ArrowArrayHolderImpl(Date64Array(with))
return try ArrowArrayHolderImpl(Date64Array(with))
case .time32:
return ArrowArrayHolderImpl(Time32Array(with))
return try ArrowArrayHolderImpl(Time32Array(with))
case .time64:
return ArrowArrayHolderImpl(Time64Array(with))
return try ArrowArrayHolderImpl(Time64Array(with))
case .string:
return ArrowArrayHolderImpl(StringArray(with))
return try ArrowArrayHolderImpl(StringArray(with))
case .boolean:
return ArrowArrayHolderImpl(BoolArray(with))
return try ArrowArrayHolderImpl(BoolArray(with))
case .binary:
return ArrowArrayHolderImpl(BinaryArray(with))
return try ArrowArrayHolderImpl(BinaryArray(with))
case .strct:
return ArrowArrayHolderImpl(StructArray(with))
return try ArrowArrayHolderImpl(StructArray(with))
default:
throw ArrowError.invalid("Array not found for type: \(arrowType)")
}
Expand All @@ -125,7 +125,7 @@ public class ArrowArray<T>: AsString, AnyArray {
public var nullCount: UInt {return self.arrowData.nullCount}
public var length: UInt {return self.arrowData.length}

public required init(_ arrowData: ArrowData) {
public required init(_ arrowData: ArrowData) throws {
self.arrowData = arrowData
}

Expand Down Expand Up @@ -277,18 +277,14 @@ public class BinaryArray: ArrowArray<Data> {

public class StructArray: ArrowArray<[Any?]> {
public private(set) var arrowFields: [ArrowArrayHolder]?
public required init(_ arrowData: ArrowData) {
super.init(arrowData)
}

public func initialize() throws -> StructArray {
public required init(_ arrowData: ArrowData) throws {
try super.init(arrowData)
var fields = [ArrowArrayHolder]()
for child in arrowData.children {
fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child))
}

self.arrowFields = fields
return self
}

public override subscript(_ index: UInt) -> [Any?]? {
Expand Down
118 changes: 117 additions & 1 deletion swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ public class ArrowArrayBuilder<T: ArrowBufferBuilder, U: ArrowArray<T.ItemType>>
public func finish() throws -> ArrowArray<T.ItemType> {
let buffers = self.bufferBuilder.finish()
let arrowData = try ArrowData(self.type, buffers: buffers, nullCount: self.nullCount)
return U(arrowData)
let array = try U(arrowData)
return array
}

public func getStride() -> Int {
Expand Down Expand Up @@ -118,6 +119,55 @@ public class Time64ArrayBuilder: ArrowArrayBuilder<FixedBufferBuilder<Time64>, T
}
}

public class StructArrayBuilder: ArrowArrayBuilder<StructBufferBuilder, StructArray> {
let builders: [any ArrowArrayHolderBuilder]
let fields: [ArrowField]
public init(_ fields: [ArrowField], builders: [any ArrowArrayHolderBuilder]) throws {
self.fields = fields
self.builders = builders
try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
self.bufferBuilder.initializeTypeInfo(fields)
}

public init(_ fields: [ArrowField]) throws {
self.fields = fields
var builders = [any ArrowArrayHolderBuilder]()
for field in fields {
builders.append(try ArrowArrayBuilders.loadBuilder(arrowType: field.type))
}

self.builders = builders
try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields))
}

public override func append(_ values: [Any?]?) {
self.bufferBuilder.append(values)
if let anyValues = values {
for index in 0..<builders.count {
self.builders[index].appendAny(anyValues[index])
}
} else {
for index in 0..<builders.count {
self.builders[index].appendAny(nil)
}
}
}

public override func finish() throws -> StructArray {
let buffers = self.bufferBuilder.finish()
var childData = [ArrowData]()
for builder in self.builders {
childData.append(try builder.toHolder().array.arrowData)
}

let arrowData = try ArrowData(self.type, buffers: buffers,
children: childData, nullCount: self.nullCount,
length: self.length)
let structArray = try StructArray(arrowData)
return structArray
}
}

public class ArrowArrayBuilders {
public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity
_ builderType: Any.Type) throws -> ArrowArrayHolderBuilder {
Expand Down Expand Up @@ -168,6 +218,72 @@ public class ArrowArrayBuilders {
type == Float.self || type == Date.self
}

public static func loadStructArrayBuilderForType<T>(_ obj: T) throws -> StructArrayBuilder {
let mirror = Mirror(reflecting: obj)
var builders = [ArrowArrayHolderBuilder]()
var fields = [ArrowField]()
for (property, value) in mirror.children {
guard let propertyName = property else {
continue
}

let builderType = type(of: value)
let arrowType = ArrowType(ArrowType.infoForType(builderType))
fields.append(ArrowField(propertyName, type: arrowType, isNullable: true))
builders.append(try loadBuilder(arrowType: arrowType))
}

return try StructArrayBuilder(fields, builders: builders)
}

public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity
arrowType: ArrowType) throws -> ArrowArrayHolderBuilder {
switch arrowType.id {
case .uint8:
return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt8>
case .uint16:
return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt16>
case .uint32:
return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt32>
case .uint64:
return try loadNumberArrayBuilder() as NumberArrayBuilder<UInt64>
case .int8:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Int8>
case .int16:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Int16>
case .int32:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Int32>
case .int64:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Int64>
case .double:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Double>
case .float:
return try loadNumberArrayBuilder() as NumberArrayBuilder<Float>
case .string:
return try StringArrayBuilder()
case .boolean:
return try BoolArrayBuilder()
case .binary:
return try BinaryArrayBuilder()
case .date32:
return try Date32ArrayBuilder()
case .date64:
return try Date64ArrayBuilder()
case .time32:
guard let timeType = arrowType as? ArrowTypeTime32 else {
throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found")
}
return try Time32ArrayBuilder(timeType.unit)
case .time64:
guard let timeType = arrowType as? ArrowTypeTime64 else {
throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found")
}
return try Time64ArrayBuilder(timeType.unit)
default:
throw ArrowError.unknownType("Builder not found for arrow type: \(arrowType.id)")
}
}

public static func loadNumberArrayBuilder<T>() throws -> NumberArrayBuilder<T> {
let type = T.self
if type == Int8.self {
Expand Down
72 changes: 62 additions & 10 deletions swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,14 @@ public protocol ArrowBufferBuilder {
func finish() -> [ArrowBuffer]
}

public class BaseBufferBuilder<T> {
var values: ArrowBuffer
public class BaseBufferBuilder {
var nulls: ArrowBuffer
var stride: Int
public var offset: UInt = 0
public var capacity: UInt {return self.values.capacity}
public var capacity: UInt {return self.nulls.capacity}
public var length: UInt = 0
public var nullCount: UInt = 0

init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout<T>.stride) {
self.stride = stride
self.values = values
init(_ nulls: ArrowBuffer) {
self.nulls = nulls
}

Expand All @@ -61,7 +57,19 @@ public class BaseBufferBuilder<T> {
}
}

public class FixedBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
public class ValuesBufferBuilder<T>: BaseBufferBuilder {
var values: ArrowBuffer
var stride: Int
public override var capacity: UInt {return self.values.capacity}

init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout<T>.stride) {
self.stride = stride
self.values = values
super.init(nulls)
}
}

public class FixedBufferBuilder<T>: ValuesBufferBuilder<T>, ArrowBufferBuilder {
public typealias ItemType = T
private let defaultVal: ItemType
public required init() throws {
Expand Down Expand Up @@ -138,7 +146,7 @@ public class FixedBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
}
}

public class BoolBufferBuilder: BaseBufferBuilder<Bool>, ArrowBufferBuilder {
public class BoolBufferBuilder: ValuesBufferBuilder<Bool>, ArrowBufferBuilder {
public typealias ItemType = Bool
public required init() throws {
let values = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
Expand Down Expand Up @@ -190,7 +198,7 @@ public class BoolBufferBuilder: BaseBufferBuilder<Bool>, ArrowBufferBuilder {
}
}

public class VariableBufferBuilder<T>: BaseBufferBuilder<T>, ArrowBufferBuilder {
public class VariableBufferBuilder<T>: ValuesBufferBuilder<T>, ArrowBufferBuilder {
public typealias ItemType = T
var offsets: ArrowBuffer
let binaryStride = MemoryLayout<UInt8>.stride
Expand Down Expand Up @@ -327,3 +335,47 @@ public class Date64BufferBuilder: AbstractWrapperBufferBuilder<Date, Int64> {
}
}
}

public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder {
public typealias ItemType = [Any?]
var info: ArrowNestedType?
public init() throws {
let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout<UInt8>.stride))
super.init(nulls)
}

public func initializeTypeInfo(_ fields: [ArrowField]) {
info = ArrowNestedType(ArrowType.ArrowStruct, fields: fields)
}

public func append(_ newValue: [Any?]?) {
let index = UInt(self.length)
self.length += 1
if length > self.nulls.length {
self.resize(length)
}

if newValue != nil {
BitUtility.setBit(index + self.offset, buffer: self.nulls)
} else {
self.nullCount += 1
BitUtility.clearBit(index + self.offset, buffer: self.nulls)
}
}

public func resize(_ length: UInt) {
if length > self.nulls.length {
let resizeLength = resizeLength(self.nulls)
var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity)
self.nulls = nulls
}
}

public func finish() -> [ArrowBuffer] {
let length = self.length
var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout<UInt8>.size))
ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity)
return [nulls]
}
}
Loading

0 comments on commit 53b09f4

Please sign in to comment.