diff --git a/swift/Arrow/Sources/Arrow/ArrowArray.swift b/swift/Arrow/Sources/Arrow/ArrowArray.swift index b0f20ee06c2e4..4fc1b8b9fc71c 100644 --- a/swift/Arrow/Sources/Arrow/ArrowArray.swift +++ b/swift/Arrow/Sources/Arrow/ArrowArray.swift @@ -78,41 +78,41 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder { _ arrowType: ArrowType, with: ArrowData) throws -> ArrowArrayHolder { switch arrowType.id { case .int8: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .int16: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .int32: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .int64: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .uint8: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .uint16: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .uint32: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .uint64: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .double: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .float: - return ArrowArrayHolderImpl(FixedArray(with)) + return try ArrowArrayHolderImpl(FixedArray(with)) case .date32: - return ArrowArrayHolderImpl(Date32Array(with)) + return try ArrowArrayHolderImpl(Date32Array(with)) case .date64: - return ArrowArrayHolderImpl(Date64Array(with)) + return try ArrowArrayHolderImpl(Date64Array(with)) case .time32: - return ArrowArrayHolderImpl(Time32Array(with)) + return try ArrowArrayHolderImpl(Time32Array(with)) case .time64: - return ArrowArrayHolderImpl(Time64Array(with)) + return try ArrowArrayHolderImpl(Time64Array(with)) case .string: - return ArrowArrayHolderImpl(StringArray(with)) + return try ArrowArrayHolderImpl(StringArray(with)) case .boolean: - return ArrowArrayHolderImpl(BoolArray(with)) + return try ArrowArrayHolderImpl(BoolArray(with)) case .binary: - return ArrowArrayHolderImpl(BinaryArray(with)) + return try ArrowArrayHolderImpl(BinaryArray(with)) case .strct: - return ArrowArrayHolderImpl(StructArray(with)) + return try ArrowArrayHolderImpl(StructArray(with)) default: throw ArrowError.invalid("Array not found for type: \(arrowType)") } @@ -125,7 +125,7 @@ public class ArrowArray: AsString, AnyArray { public var nullCount: UInt {return self.arrowData.nullCount} public var length: UInt {return self.arrowData.length} - public required init(_ arrowData: ArrowData) { + public required init(_ arrowData: ArrowData) throws { self.arrowData = arrowData } @@ -277,18 +277,14 @@ public class BinaryArray: ArrowArray { public class StructArray: ArrowArray<[Any?]> { public private(set) var arrowFields: [ArrowArrayHolder]? - public required init(_ arrowData: ArrowData) { - super.init(arrowData) - } - - public func initialize() throws -> StructArray { + public required init(_ arrowData: ArrowData) throws { + try super.init(arrowData) var fields = [ArrowArrayHolder]() for child in arrowData.children { fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child)) } self.arrowFields = fields - return self } public override subscript(_ index: UInt) -> [Any?]? { diff --git a/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift b/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift index dc80f52f8ebd2..005cad79daeda 100644 --- a/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift +++ b/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift @@ -58,7 +58,8 @@ public class ArrowArrayBuilder> public func finish() throws -> ArrowArray { let buffers = self.bufferBuilder.finish() let arrowData = try ArrowData(self.type, buffers: buffers, nullCount: self.nullCount) - return U(arrowData) + let array = try U(arrowData) + return array } public func getStride() -> Int { @@ -118,6 +119,55 @@ public class Time64ArrayBuilder: ArrowArrayBuilder, T } } +public class StructArrayBuilder: ArrowArrayBuilder { + let builders: [any ArrowArrayHolderBuilder] + let fields: [ArrowField] + public init(_ fields: [ArrowField], builders: [any ArrowArrayHolderBuilder]) throws { + self.fields = fields + self.builders = builders + try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields)) + self.bufferBuilder.initializeTypeInfo(fields) + } + + public init(_ fields: [ArrowField]) throws { + self.fields = fields + var builders = [any ArrowArrayHolderBuilder]() + for field in fields { + builders.append(try ArrowArrayBuilders.loadBuilder(arrowType: field.type)) + } + + self.builders = builders + try super.init(ArrowNestedType(ArrowType.ArrowStruct, fields: fields)) + } + + public override func append(_ values: [Any?]?) { + self.bufferBuilder.append(values) + if let anyValues = values { + for index in 0.. StructArray { + let buffers = self.bufferBuilder.finish() + var childData = [ArrowData]() + for builder in self.builders { + childData.append(try builder.toHolder().array.arrowData) + } + + let arrowData = try ArrowData(self.type, buffers: buffers, + children: childData, nullCount: self.nullCount, + length: self.length) + let structArray = try StructArray(arrowData) + return structArray + } +} + public class ArrowArrayBuilders { public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity _ builderType: Any.Type) throws -> ArrowArrayHolderBuilder { @@ -168,6 +218,72 @@ public class ArrowArrayBuilders { type == Float.self || type == Date.self } + public static func loadStructArrayBuilderForType(_ obj: T) throws -> StructArrayBuilder { + let mirror = Mirror(reflecting: obj) + var builders = [ArrowArrayHolderBuilder]() + var fields = [ArrowField]() + for (property, value) in mirror.children { + guard let propertyName = property else { + continue + } + + let builderType = type(of: value) + let arrowType = ArrowType(ArrowType.infoForType(builderType)) + fields.append(ArrowField(propertyName, type: arrowType, isNullable: true)) + builders.append(try loadBuilder(arrowType: arrowType)) + } + + return try StructArrayBuilder(fields, builders: builders) + } + + public static func loadBuilder( // swiftlint:disable:this cyclomatic_complexity + arrowType: ArrowType) throws -> ArrowArrayHolderBuilder { + switch arrowType.id { + case .uint8: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .uint16: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .uint32: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .uint64: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .int8: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .int16: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .int32: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .int64: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .double: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .float: + return try loadNumberArrayBuilder() as NumberArrayBuilder + case .string: + return try StringArrayBuilder() + case .boolean: + return try BoolArrayBuilder() + case .binary: + return try BinaryArrayBuilder() + case .date32: + return try Date32ArrayBuilder() + case .date64: + return try Date64ArrayBuilder() + case .time32: + guard let timeType = arrowType as? ArrowTypeTime32 else { + throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found") + } + return try Time32ArrayBuilder(timeType.unit) + case .time64: + guard let timeType = arrowType as? ArrowTypeTime64 else { + throw ArrowError.invalid("Expected arrow type for \(arrowType.id) not found") + } + return try Time64ArrayBuilder(timeType.unit) + default: + throw ArrowError.unknownType("Builder not found for arrow type: \(arrowType.id)") + } + } + public static func loadNumberArrayBuilder() throws -> NumberArrayBuilder { let type = T.self if type == Int8.self { diff --git a/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift b/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift index e4c8036c327d1..47f9c40354b1b 100644 --- a/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift +++ b/swift/Arrow/Sources/Arrow/ArrowBufferBuilder.swift @@ -30,18 +30,14 @@ public protocol ArrowBufferBuilder { func finish() -> [ArrowBuffer] } -public class BaseBufferBuilder { - var values: ArrowBuffer +public class BaseBufferBuilder { var nulls: ArrowBuffer - var stride: Int public var offset: UInt = 0 - public var capacity: UInt {return self.values.capacity} + public var capacity: UInt {return self.nulls.capacity} public var length: UInt = 0 public var nullCount: UInt = 0 - init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout.stride) { - self.stride = stride - self.values = values + init(_ nulls: ArrowBuffer) { self.nulls = nulls } @@ -61,7 +57,19 @@ public class BaseBufferBuilder { } } -public class FixedBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { +public class ValuesBufferBuilder: BaseBufferBuilder { + var values: ArrowBuffer + var stride: Int + public override var capacity: UInt {return self.values.capacity} + + init(values: ArrowBuffer, nulls: ArrowBuffer, stride: Int = MemoryLayout.stride) { + self.stride = stride + self.values = values + super.init(nulls) + } +} + +public class FixedBufferBuilder: ValuesBufferBuilder, ArrowBufferBuilder { public typealias ItemType = T private let defaultVal: ItemType public required init() throws { @@ -138,7 +146,7 @@ public class FixedBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { } } -public class BoolBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { +public class BoolBufferBuilder: ValuesBufferBuilder, ArrowBufferBuilder { public typealias ItemType = Bool public required init() throws { let values = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout.stride)) @@ -190,7 +198,7 @@ public class BoolBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { } } -public class VariableBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { +public class VariableBufferBuilder: ValuesBufferBuilder, ArrowBufferBuilder { public typealias ItemType = T var offsets: ArrowBuffer let binaryStride = MemoryLayout.stride @@ -327,3 +335,47 @@ public class Date64BufferBuilder: AbstractWrapperBufferBuilder { } } } + +public final class StructBufferBuilder: BaseBufferBuilder, ArrowBufferBuilder { + public typealias ItemType = [Any?] + var info: ArrowNestedType? + public init() throws { + let nulls = ArrowBuffer.createBuffer(0, size: UInt(MemoryLayout.stride)) + super.init(nulls) + } + + public func initializeTypeInfo(_ fields: [ArrowField]) { + info = ArrowNestedType(ArrowType.ArrowStruct, fields: fields) + } + + public func append(_ newValue: [Any?]?) { + let index = UInt(self.length) + self.length += 1 + if length > self.nulls.length { + self.resize(length) + } + + if newValue != nil { + BitUtility.setBit(index + self.offset, buffer: self.nulls) + } else { + self.nullCount += 1 + BitUtility.clearBit(index + self.offset, buffer: self.nulls) + } + } + + public func resize(_ length: UInt) { + if length > self.nulls.length { + let resizeLength = resizeLength(self.nulls) + var nulls = ArrowBuffer.createBuffer(resizeLength/8 + 1, size: UInt(MemoryLayout.size)) + ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: self.nulls.capacity) + self.nulls = nulls + } + } + + public func finish() -> [ArrowBuffer] { + let length = self.length + var nulls = ArrowBuffer.createBuffer(length/8 + 1, size: UInt(MemoryLayout.size)) + ArrowBuffer.copyCurrent(self.nulls, to: &nulls, len: nulls.capacity) + return [nulls] + } +} diff --git a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift index c701653ecb2c9..22c0672b27eac 100644 --- a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift +++ b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift @@ -23,7 +23,7 @@ private func makeBinaryHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowBinary) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(BinaryArray(arrowData))) + return .success(ArrowArrayHolderImpl(try BinaryArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -36,7 +36,7 @@ private func makeStringHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowString) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(StringArray(arrowData))) + return .success(ArrowArrayHolderImpl(try StringArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -51,11 +51,11 @@ private func makeDateHolder(_ field: ArrowField, do { if field.type.id == .date32 { let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(Date32Array(arrowData))) + return .success(ArrowArrayHolderImpl(try Date32Array(arrowData))) } let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(Date64Array(arrowData))) + return .success(ArrowArrayHolderImpl(try Date64Array(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -71,7 +71,7 @@ private func makeTimeHolder(_ field: ArrowField, if field.type.id == .time32 { if let arrowType = field.type as? ArrowTypeTime32 { let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(try FixedArray(arrowData))) } else { return .failure(.invalid("Incorrect field type for time: \(field.type)")) } @@ -79,7 +79,7 @@ private func makeTimeHolder(_ field: ArrowField, if let arrowType = field.type as? ArrowTypeTime64 { let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(try FixedArray(arrowData))) } else { return .failure(.invalid("Incorrect field type for time: \(field.type)")) } @@ -95,7 +95,7 @@ private func makeBoolHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowBool) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(BoolArray(arrowData))) + return .success(ArrowArrayHolderImpl(try BoolArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -109,7 +109,7 @@ private func makeFixedHolder( ) -> Result { do { let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(try FixedArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { diff --git a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift index ed0cb1148e871..bfd7492064352 100644 --- a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift +++ b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift @@ -212,6 +212,67 @@ final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length XCTAssertEqual(microArray[2], 987654321) } + func testStructArray() throws { // swiftlint:disable:this function_body_length + class StructTest { + var fieldBool: Bool = false + var fieldInt8: Int8 = 0 + var fieldInt16: Int16 = 0 + var fieldInt32: Int32 = 0 + var fieldInt64: Int64 = 0 + var fieldUInt8: UInt8 = 0 + var fieldUInt16: UInt16 = 0 + var fieldUInt32: UInt32 = 0 + var fieldUInt64: UInt64 = 0 + var fieldDouble: Double = 0 + var fieldFloat: Float = 0 + var fieldString: String = "" + var fieldData = Data() + var fieldDate: Date = Date.now + } + + enum STIndex: Int { + case bool, int8, int16, int32, int64 + case uint8, uint16, uint32, uint64, double + case float, string, data, date + } + + let testData = StructTest() + let dateNow = Date.now + let structBuilder = try ArrowArrayBuilders.loadStructArrayBuilderForType(testData) + structBuilder.append([true, Int8(1), Int16(2), Int32(3), Int64(4), + UInt8(5), UInt16(6), UInt32(7), UInt64(8), Double(9.9), + Float(10.10), "11", Data("12".utf8), dateNow]) + structBuilder.append(nil) + structBuilder.append([true, Int8(13), Int16(14), Int32(15), Int64(16), + UInt8(17), UInt16(18), UInt32(19), UInt64(20), Double(21.21), + Float(22.22), "23", Data("24".utf8), dateNow]) + XCTAssertEqual(structBuilder.length, 3) + let structArray = try structBuilder.finish() + XCTAssertEqual(structArray.length, 3) + XCTAssertNil(structArray[1]) + XCTAssertEqual(structArray.arrowFields![0].length, 3) + XCTAssertNil(structArray.arrowFields![0].array.asAny(1)) + XCTAssertEqual(structArray[0]![STIndex.bool.rawValue] as? Bool, true) + XCTAssertEqual(structArray[0]![STIndex.int8.rawValue] as? Int8, 1) + XCTAssertEqual(structArray[0]![STIndex.int16.rawValue] as? Int16, 2) + XCTAssertEqual(structArray[0]![STIndex.int32.rawValue] as? Int32, 3) + XCTAssertEqual(structArray[0]![STIndex.int64.rawValue] as? Int64, 4) + XCTAssertEqual(structArray[0]![STIndex.uint8.rawValue] as? UInt8, 5) + XCTAssertEqual(structArray[0]![STIndex.uint16.rawValue] as? UInt16, 6) + XCTAssertEqual(structArray[0]![STIndex.uint32.rawValue] as? UInt32, 7) + XCTAssertEqual(structArray[0]![STIndex.uint64.rawValue] as? UInt64, 8) + XCTAssertEqual(structArray[0]![STIndex.double.rawValue] as? Double, 9.9) + XCTAssertEqual(structArray[0]![STIndex.float.rawValue] as? Float, 10.10) + XCTAssertEqual(structArray[2]![STIndex.string.rawValue] as? String, "23") + XCTAssertEqual( + String(decoding: (structArray[0]![STIndex.data.rawValue] as? Data)!, as: UTF8.self), "12") + let dateFormatter = DateFormatter() + dateFormatter.timeStyle = .full + XCTAssertTrue( + dateFormatter.string(from: (structArray[0]![STIndex.date.rawValue] as? Date)!) == + dateFormatter.string(from: dateNow)) + } + func checkHolderForType(_ checkType: ArrowType) throws { let buffers = [ArrowBuffer(length: 0, capacity: 0, rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero)),