diff --git a/Sources/Arrow/ArrowArrayBuilder.swift b/Sources/Arrow/ArrowArrayBuilder.swift index e7ef607..22d9a65 100644 --- a/Sources/Arrow/ArrowArrayBuilder.swift +++ b/Sources/Arrow/ArrowArrayBuilder.swift @@ -19,6 +19,8 @@ import Foundation /// A type which builds a type-erased `ArrowArray`. public protocol AnyArrowArrayBuilder { + /// Returns an unparameterised `ArrowArray`. + /// - Returns: The type-erased Arrow array. func toAnyArrowArray() throws(ArrowError) -> AnyArrowArray func appendAny(_ val: Any?) } @@ -73,8 +75,6 @@ extension ArrowArrayBuilderInternal { self.arrowType.getStride() } - /// Returns an unparameterised `ArrowArray`. - /// - Returns: The type-erased Arrow array. public func toAnyArrowArray() throws(ArrowError) -> AnyArrowArray { try self.finish() } @@ -109,7 +109,7 @@ public class ArrowArrayBuilderBase< } } -/// A type which builds an `ArrowArray` with a numeric `ItemType`. +/// An array builder for numeric types. public class NumberArrayBuilder: ArrowArrayBuilderBase< FixedBufferBuilder, FixedArray @@ -120,6 +120,7 @@ where ItemType: Numeric, ItemType: BitwiseCopyable { } } +/// A `String` array builder. public class StringArrayBuilder: ArrowArrayBuilderBase< VariableBufferBuilder, StringArray @@ -130,6 +131,7 @@ public class StringArrayBuilder: ArrowArrayBuilderBase< } } +/// A `Data` array builder. public class BinaryArrayBuilder: ArrowArrayBuilderBase< VariableBufferBuilder, BinaryArray @@ -140,6 +142,7 @@ public class BinaryArrayBuilder: ArrowArrayBuilderBase< } } +/// A `Bool` array builder. public class BoolArrayBuilder: ArrowArrayBuilderBase< BoolBufferBuilder, BoolArray > @@ -149,6 +152,7 @@ public class BoolArrayBuilder: ArrowArrayBuilderBase< } } +/// A 32-bit date array builder. public class Date32ArrayBuilder: ArrowArrayBuilderBase< Date32BufferBuilder, Date32Array @@ -159,6 +163,7 @@ public class Date32ArrayBuilder: ArrowArrayBuilderBase< } } +/// A 64-bit date array builder. public class Date64ArrayBuilder: ArrowArrayBuilderBase< Date64BufferBuilder, Date64Array @@ -169,6 +174,7 @@ public class Date64ArrayBuilder: ArrowArrayBuilderBase< } } +// A 32-bit elaspsed time builder. public class Time32ArrayBuilder: ArrowArrayBuilderBase< FixedBufferBuilder, Time32Array @@ -179,6 +185,7 @@ public class Time32ArrayBuilder: ArrowArrayBuilderBase< } } +// A 64-bit elaspsed time builder. public class Time64ArrayBuilder: ArrowArrayBuilderBase< FixedBufferBuilder, Time64Array @@ -189,6 +196,7 @@ public class Time64ArrayBuilder: ArrowArrayBuilderBase< } } +// A Timestamp array builder. public class TimestampArrayBuilder: ArrowArrayBuilderBase< FixedBufferBuilder, TimestampArray @@ -203,6 +211,7 @@ public class TimestampArrayBuilder: ArrowArrayBuilderBase< // MARK: Struct array builder. +/// Builds an array of structs. public class StructArrayBuilder: ArrowArrayBuilderBase< StructBufferBuilder, NestedArray @@ -262,7 +271,9 @@ public class StructArrayBuilder: ArrowArrayBuilderBase< // MARK: List array builder. -/// A type which can build an `NestedArray`containing exactly `ItemType`. +/// Builds a `NestedArray`containing lists of `ItemType`. +/// +/// Both lists and items in lists are nullablie. public class ListArrayBuilder: ArrowArrayBuilderBase< ListBufferBuilder, NestedArray @@ -271,18 +282,17 @@ public class ListArrayBuilder: ArrowArrayBuilderBase< let valueBuilder: any AnyArrowArrayBuilder public override init(_ elementType: ArrowType) throws(ArrowError) { - guard case .list(let field) = elementType else { throw .invalid("Expected a field with type .list") } - self.valueBuilder = try ArrowArrayBuilders.loadBuilder( arrowType: field.type ) try super.init(elementType) } - // Overrides the default + // Overrides the protocol extension. + // Swift currently provides no marker for this. public func append(_ values: [Any?]?) { self.bufferBuilder.append(values) if let vals = values { diff --git a/Sources/Arrow/ArrowBuffer.swift b/Sources/Arrow/ArrowBuffer.swift index 5301f2f..7773ccf 100644 --- a/Sources/Arrow/ArrowBuffer.swift +++ b/Sources/Arrow/ArrowBuffer.swift @@ -1,5 +1,5 @@ // Copyright 2025 The Apache Software Foundation -// Copyright 2025 The Columnar-Swift Contributors +// Copyright 2025 The Columnar Swift Contributors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,7 +24,8 @@ public class ArrowBuffer { let isMemoryOwner: Bool init( - length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer, + length: UInt, capacity: UInt, + rawPointer: UnsafeMutableRawPointer, isMemoryOwner: Bool = true ) { self.length = length diff --git a/Sources/Arrow/ArrowSchema.swift b/Sources/Arrow/ArrowSchema.swift index 1b8d8fa..5549f37 100644 --- a/Sources/Arrow/ArrowSchema.swift +++ b/Sources/Arrow/ArrowSchema.swift @@ -14,7 +14,8 @@ import Foundation -public struct ArrowSchema: Sendable { +// Note this is a reference type to reduce copying. +public final class ArrowSchema: Sendable { public let fields: [ArrowField] public let fieldLookup: [String: Int] init(_ fields: [ArrowField]) { diff --git a/Sources/Arrow/ArrowTable.swift b/Sources/Arrow/ArrowTable.swift index d1daebd..82bcf9d 100644 --- a/Sources/Arrow/ArrowTable.swift +++ b/Sources/Arrow/ArrowTable.swift @@ -1,4 +1,5 @@ // Copyright 2025 The Apache Software Foundation +// Copyright 2025 The Columnar Swift Contributors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -244,6 +245,12 @@ public class RecordBatch { return self } + /// Add a column the `RecordBatch` builder. + /// - Parameters: + /// - field: The field describing the array. + /// - arrowArray: The array to add to the reocrd batch. + /// - Returns: The `RecordBatch.Builder` with the array appended and the field added to + /// the schema. @discardableResult public func addColumn( _ field: ArrowField, @@ -263,6 +270,17 @@ public class RecordBatch { } } } + // Check nullability matches actual data + let schema = self.schemaBuilder.finish() + for (index, field) in schema.fields.enumerated() { + let column = columns[index] + if !field.isNullable && column.nullCount > 0 { + return .failure( + .invalid( + "non-nullable column '\(field.name)' contains \(column.nullCount) null values." + )) + } + } return .success( RecordBatch(self.schemaBuilder.finish(), columns: self.columns) ) diff --git a/Sources/Arrow/ArrowType.swift b/Sources/Arrow/ArrowType.swift index 9fb2d0d..266f7ba 100644 --- a/Sources/Arrow/ArrowType.swift +++ b/Sources/Arrow/ArrowType.swift @@ -210,6 +210,7 @@ public indirect enum ArrowType: Codable, Sendable, Equatable { /// of binary data in total. case binary /// Opaque binary data of fixed size. + /// /// Enum parameter specifies the number of bytes per value. case fixedSizeBinary(Int32) /// Opaque binary data of variable length and 64-bit offsets. diff --git a/Tests/ArrowTests/RecordBatchTests.swift b/Tests/ArrowTests/RecordBatchTests.swift index 8ed1348..120f633 100644 --- a/Tests/ArrowTests/RecordBatchTests.swift +++ b/Tests/ArrowTests/RecordBatchTests.swift @@ -54,4 +54,21 @@ struct RecordBatchTests { throw error } } + + // Ensure that invalid record batches can't be built. + @Test func schemaNullabilityChecked() throws { + let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + stringBuilder.append("test10") + stringBuilder.append(nil) + stringBuilder.append("test33") + let array = try stringBuilder.finish() + + let field = ArrowField(name: "col1", dataType: .utf8, isNullable: false) + let result = RecordBatch.Builder() + .addColumn(field, arrowArray: array) + .finish() + if case .success(_) = result { + Issue.record("Record batch should have rejected null data.") + } + } }