Skip to content

Commit

Permalink
apacheGH-43160: Add Struct Array
Browse files Browse the repository at this point in the history
  • Loading branch information
abandy committed Jul 6, 2024
1 parent 8d5b289 commit d01a19b
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 20 deletions.
108 changes: 102 additions & 6 deletions swift/Arrow/Sources/Arrow/ArrowArray.swift
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,19 @@ public protocol ArrowArrayHolder {
var type: ArrowType {get}
var length: UInt {get}
var nullCount: UInt {get}
var array: Any {get}
var array: AnyArray {get}
var data: ArrowData {get}
var getBufferData: () -> [Data] {get}
var getBufferDataSizes: () -> [Int] {get}
var getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn {get}
}

public class ArrowArrayHolderImpl: ArrowArrayHolder {
public let array: Any
public let data: ArrowData
public let type: ArrowType
public let length: UInt
public let nullCount: UInt
public let array: AnyArray
public let getBufferData: () -> [Data]
public let getBufferDataSizes: () -> [Int]
public let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn
Expand Down Expand Up @@ -73,6 +73,50 @@ public class ArrowArrayHolderImpl: ArrowArrayHolder {
return ArrowColumn(field, chunked: ChunkedArrayHolder(try ChunkedArray<T>(arrays)))
}
}

public static func loadArray( // swiftlint:disable:this cyclomatic_complexity
_ arrowType: ArrowType, with: ArrowData) throws -> ArrowArrayHolder {
switch arrowType.id {
case .int8:
return ArrowArrayHolderImpl(FixedArray<Int8>(with))
case .int16:
return ArrowArrayHolderImpl(FixedArray<Int16>(with))
case .int32:
return ArrowArrayHolderImpl(FixedArray<Int32>(with))
case .int64:
return ArrowArrayHolderImpl(FixedArray<Int64>(with))
case .uint8:
return ArrowArrayHolderImpl(FixedArray<UInt8>(with))
case .uint16:
return ArrowArrayHolderImpl(FixedArray<UInt16>(with))
case .uint32:
return ArrowArrayHolderImpl(FixedArray<UInt32>(with))
case .uint64:
return ArrowArrayHolderImpl(FixedArray<UInt64>(with))
case .double:
return ArrowArrayHolderImpl(FixedArray<Double>(with))
case .float:
return ArrowArrayHolderImpl(FixedArray<Float>(with))
case .date32:
return ArrowArrayHolderImpl(Date32Array(with))
case .date64:
return ArrowArrayHolderImpl(Date64Array(with))
case .time32:
return ArrowArrayHolderImpl(Time32Array(with))
case .time64:
return ArrowArrayHolderImpl(Time64Array(with))
case .string:
return ArrowArrayHolderImpl(StringArray(with))
case .boolean:
return ArrowArrayHolderImpl(BoolArray(with))
case .binary:
return ArrowArrayHolderImpl(BinaryArray(with))
case .strct:
return ArrowArrayHolderImpl(StructArray(with))
default:
throw ArrowError.invalid("Array not found for type: \(arrowType)")
}
}
}

public class ArrowArray<T>: AsString, AnyArray {
Expand Down Expand Up @@ -221,10 +265,7 @@ public class BinaryArray: ArrowArray<Data> {
}

public override func asString(_ index: UInt) -> String {
if self[index] == nil {
return ""
}

if self[index] == nil {return ""}
let data = self[index]!
if options.printAsHex {
return data.hexEncodedString()
Expand All @@ -233,3 +274,58 @@ public class BinaryArray: ArrowArray<Data> {
}
}
}

public class StructArray: ArrowArray<[Any?]> {
public private(set) var arrowFields: [ArrowArrayHolder]?
public required init(_ arrowData: ArrowData) {
super.init(arrowData)
}

public func initialize() throws -> StructArray {
var fields = [ArrowArrayHolder]()
for child in arrowData.children {
fields.append(try ArrowArrayHolderImpl.loadArray(child.type, with: child))
}

self.arrowFields = fields
return self
}

public override subscript(_ index: UInt) -> [Any?]? {
if self.arrowData.isNull(index) {
return nil
}

if let fields = arrowFields {
var result = [Any?]()
for field in fields {
result.append(field.array.asAny(index))
}

return result
}

return nil
}

public override func asString(_ index: UInt) -> String {
if self.arrowData.isNull(index) {
return ""
}

var output = "{"
if let fields = arrowFields {
for fieldIndex in 0..<fields.count {
let asStr = fields[fieldIndex].array as? AsString
if fieldIndex == 0 {
output.append("\(asStr!.asString(index))")
} else {
output.append(",\(asStr!.asString(index))")
}
}
}

output += "}"
return output
}
}
2 changes: 1 addition & 1 deletion swift/Arrow/Sources/Arrow/ArrowCImporter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public class ImportArrayHolder: ArrowArrayHolder {
public var type: ArrowType {self.holder.type}
public var length: UInt {self.holder.length}
public var nullCount: UInt {self.holder.nullCount}
public var array: Any {self.holder.array}
public var array: AnyArray {self.holder.array}
public var data: ArrowData {self.holder.data}
public var getBufferData: () -> [Data] {self.holder.getBufferData}
public var getBufferDataSizes: () -> [Int] {self.holder.getBufferDataSizes}
Expand Down
12 changes: 2 additions & 10 deletions swift/Arrow/Sources/Arrow/ArrowDecoder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -96,23 +96,15 @@ public class ArrowDecoder: Decoder {
throw ArrowError.invalid("Column for key \"\(name)\" not found")
}

guard let anyArray = col.array as? AnyArray else {
throw ArrowError.invalid("Unable to convert array to AnyArray")
}

return anyArray
return col.array
}

func getCol(_ index: Int) throws -> AnyArray {
if index >= self.columns.count {
throw ArrowError.outOfBounds(index: Int64(index))
}

guard let anyArray = self.columns[index].array as? AnyArray else {
throw ArrowError.invalid("Unable to convert array to AnyArray")
}

return anyArray
return self.columns[index].array
}

func doDecode<T>(_ key: CodingKey) throws -> T? {
Expand Down
2 changes: 1 addition & 1 deletion swift/Arrow/Sources/Arrow/ArrowTable.swift
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ public class RecordBatch {

public func anyData(for columnIndex: Int) -> AnyArray {
let arrayHolder = column(columnIndex)
return (arrayHolder.array as! AnyArray) // swiftlint:disable:this force_cast
return arrayHolder.array
}

public func column(_ index: Int) -> ArrowArrayHolder {
Expand Down
1 change: 1 addition & 0 deletions swift/Arrow/Sources/Arrow/ChunkedArray.swift
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import Foundation

public protocol AnyArray {
var arrowData: ArrowData {get}
func asAny(_ index: UInt) -> Any?
var length: UInt {get}
}
Expand Down
4 changes: 2 additions & 2 deletions swift/Arrow/Tests/ArrowTests/CodableTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ final class CodableTests: XCTestCase { // swiftlint:disable:this type_body_lengt
}

func getArrayValue<T>(_ rb: RecordBatch, colIndex: Int, rowIndex: UInt) -> T? {
let anyArray = rb.columns[colIndex].array as! AnyArray // swiftlint:disable:this force_cast
let anyArray = rb.columns[colIndex].array
return anyArray.asAny(UInt(rowIndex)) as? T
}

Expand Down Expand Up @@ -324,7 +324,7 @@ final class CodableTests: XCTestCase { // swiftlint:disable:this type_body_lengt
XCTAssertEqual(rb.columns[0].type.id, ArrowTypeId.int32)
for index in 0..<100 {
if index == 10 {
let anyArray = rb.columns[0].array as! AnyArray // swiftlint:disable:this force_cast
let anyArray = rb.columns[0].array
XCTAssertNil(anyArray.asAny(UInt(index)))
} else {
XCTAssertEqual(getArrayValue(rb, colIndex: 0, rowIndex: UInt(index)), Int32(index))
Expand Down

0 comments on commit d01a19b

Please sign in to comment.