From 6fb2c2b3d4f2634ed38bbf6b395e0eafaf8d0551 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Thu, 11 Apr 2024 10:33:16 +1200 Subject: [PATCH 01/11] Fix error message wording --- csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs index 4e273dbde5690..a37c501072f4b 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs @@ -261,7 +261,7 @@ private ArrayData LoadField( if (fieldNullCount < 0) { - throw new InvalidDataException("Null count length must be >= 0"); // TODO:Localize exception message + throw new InvalidDataException("Null count must be >= 0"); // TODO:Localize exception message } int buffers; From f9a75107377bc49128a7b64282c607e59bbebda4 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Thu, 11 Apr 2024 10:18:00 +1200 Subject: [PATCH 02/11] Add test for writing sliced arrays to IPC file --- .../ArrowFileWriterTests.cs | 33 +++++++++++++++++++ .../Apache.Arrow.Tests/ArrowReaderVerifier.cs | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs index 69b8410d030f2..6fe0d77ccc977 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs @@ -15,8 +15,11 @@ using Apache.Arrow.Ipc; using System; +using System.Collections.Generic; using System.IO; +using System.Linq; using System.Threading.Tasks; +using Apache.Arrow.Types; using Xunit; namespace Apache.Arrow.Tests @@ -106,6 +109,36 @@ public async Task WritesFooterAlignedMultipleOf8Async() await ValidateRecordBatchFile(stream, originalBatch); } + [Fact] + public async Task WriteSlicedArrays() + { + // Temporarily only test some types + var includedTypes = new HashSet + { + ArrowTypeId.Int32, + }; + var excludedTypes = new HashSet( + Enum.GetValues().Where(typeId => !includedTypes.Contains(typeId))); + + var originalBatch = TestData.CreateSampleRecordBatch(length: 100, excludedTypes: excludedTypes); + const int sliceOffset = 3; + const int sliceLength = 45; + var slicedArrays = originalBatch.Arrays + .Select(array => ArrowArrayFactory.Slice(array, sliceOffset, sliceLength)) + .ToList(); + var slicedBatch = new RecordBatch(originalBatch.Schema, slicedArrays, sliceLength); + + var stream = new MemoryStream(); + var writer = new ArrowFileWriter(stream, slicedBatch.Schema, leaveOpen: true); + + await writer.WriteRecordBatchAsync(slicedBatch); + await writer.WriteEndAsync(); + + stream.Position = 0; + + await ValidateRecordBatchFile(stream, slicedBatch); + } + private async Task ValidateRecordBatchFile(Stream stream, RecordBatch recordBatch) { var reader = new ArrowFileReader(stream); diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index ceeab92860e6f..a8e0d77171c97 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -306,7 +306,7 @@ private void CompareArrays(PrimitiveArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(expectedArray.Offset, actualArray.Offset); + Assert.Equal(0, actualArray.Offset); CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); From 4989611b62326a47690e953e9de369e7d9628210 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Fri, 12 Apr 2024 13:55:56 +1200 Subject: [PATCH 03/11] Fix writing sliced primitive arrays to IPC format --- .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 94 +++++++++++++------ .../ArrowFileWriterTests.cs | 29 ++++-- .../Apache.Arrow.Tests/ArrowReaderVerifier.cs | 2 +- 3 files changed, 89 insertions(+), 36 deletions(-) diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index 7b319b03d790c..db2e0457baa73 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -19,6 +19,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.IO; +using System.Runtime.CompilerServices; using System.Threading; using System.Threading.Tasks; using Apache.Arrow.Arrays; @@ -71,10 +72,10 @@ private class ArrowRecordBatchFlatBufferBuilder : { public readonly struct Buffer { - public readonly ArrowBuffer DataBuffer; + public readonly ReadOnlyMemory DataBuffer; public readonly int Offset; - public Buffer(ArrowBuffer buffer, int offset) + public Buffer(ReadOnlyMemory buffer, int offset) { DataBuffer = buffer; Offset = offset; @@ -239,16 +240,62 @@ private void CreateBuffers(BooleanArray array) private void CreateBuffers(PrimitiveArray array) where T : struct { - _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); - _buffers.Add(CreateBuffer(array.ValueBuffer)); + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); + _buffers.Add(CreateSlicedBuffer(array.ValueBuffer, array.Offset, array.Length)); + } + + private Buffer CreateBitmapBuffer(ArrowBuffer buffer, int offset, int length) + { + if (buffer.IsEmpty) + { + return CreateBuffer(buffer.Memory); + } + + var paddedLength = (int)CalculatePaddedLength(BitUtility.ByteCount(length)); + if (offset % 8 == 0) + { + var byteOffset = offset / 8; + var sliceLength = Math.Min(paddedLength, buffer.Length - byteOffset); + + return CreateBuffer(buffer.Memory.Slice(byteOffset, sliceLength)); + } + else + { + // Need to copy bitmap so the first bit is aligned with the first byte + var memoryOwner = _allocator.Allocate(paddedLength); + var outputSpan = memoryOwner.Memory.Span; + var inputSpan = buffer.Span; + for (var i = 0; i < length; ++i) + { + BitUtility.SetBit(outputSpan, i, BitUtility.GetBit(inputSpan, offset + i)); + } + + return CreateBuffer(memoryOwner.Memory.Slice(0, paddedLength)); + } + } + + private Buffer CreateSlicedBuffer(ArrowBuffer buffer, int offset, int length) + where T : struct + { + var size = Unsafe.SizeOf(); + var byteOffset = offset * size; + var byteLength = length * size; + var paddedLength = (int)CalculatePaddedLength(byteLength); + var sliceLength = Math.Min(paddedLength, buffer.Length - byteOffset); + return CreateBuffer(buffer.Memory.Slice(byteOffset, sliceLength)); } private Buffer CreateBuffer(ArrowBuffer buffer) + { + return CreateBuffer(buffer.Memory); + } + + private Buffer CreateBuffer(ReadOnlyMemory buffer) { int offset = TotalLength; const int UncompressedLengthSize = 8; - ArrowBuffer bufferToWrite; + ReadOnlyMemory bufferToWrite; if (_compressionCodec == null) { bufferToWrite = buffer; @@ -258,7 +305,7 @@ private Buffer CreateBuffer(ArrowBuffer buffer) // Write zero length and skip compression var uncompressedLengthBytes = _allocator.Allocate(UncompressedLengthSize); BinaryPrimitives.WriteInt64LittleEndian(uncompressedLengthBytes.Memory.Span, 0); - bufferToWrite = new ArrowBuffer(uncompressedLengthBytes); + bufferToWrite = uncompressedLengthBytes.Memory; } else { @@ -266,14 +313,14 @@ private Buffer CreateBuffer(ArrowBuffer buffer) // compressed buffers are stored. _compressionStream.Seek(0, SeekOrigin.Begin); _compressionStream.SetLength(0); - _compressionCodec.Compress(buffer.Memory, _compressionStream); + _compressionCodec.Compress(buffer, _compressionStream); if (_compressionStream.Length < buffer.Length) { var newBuffer = _allocator.Allocate((int) _compressionStream.Length + UncompressedLengthSize); BinaryPrimitives.WriteInt64LittleEndian(newBuffer.Memory.Span, buffer.Length); _compressionStream.Seek(0, SeekOrigin.Begin); _compressionStream.ReadFullBuffer(newBuffer.Memory.Slice(UncompressedLengthSize)); - bufferToWrite = new ArrowBuffer(newBuffer); + bufferToWrite = newBuffer.Memory; } else { @@ -281,8 +328,8 @@ private Buffer CreateBuffer(ArrowBuffer buffer) // buffer instead, and indicate this by setting the uncompressed length to -1 var newBuffer = _allocator.Allocate(buffer.Length + UncompressedLengthSize); BinaryPrimitives.WriteInt64LittleEndian(newBuffer.Memory.Span, -1); - buffer.Memory.CopyTo(newBuffer.Memory.Slice(UncompressedLengthSize)); - bufferToWrite = new ArrowBuffer(newBuffer); + buffer.CopyTo(newBuffer.Memory.Slice(UncompressedLengthSize)); + bufferToWrite = newBuffer.Memory; } } @@ -461,8 +508,6 @@ private protected void WriteRecordBatchInternal(RecordBatch recordBatch) private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBatch, CancellationToken cancellationToken = default) { - // TODO: Truncate buffers with extraneous padding / unused capacity - if (!HasWrittenSchema) { await WriteSchemaAsync(Schema, cancellationToken).ConfigureAwait(false); @@ -506,11 +551,11 @@ private long WriteBufferData(IReadOnlyList buffer = buffers[i].DataBuffer; if (buffer.IsEmpty) continue; - WriteBuffer(buffer); + BaseStream.Write(buffer); int paddedLength = checked((int)BitUtility.RoundUpToMultipleOf8(buffer.Length)); int padding = paddedLength - buffer.Length; @@ -537,11 +582,11 @@ private async ValueTask WriteBufferDataAsync(IReadOnlyList buffer = buffers[i].DataBuffer; if (buffer.IsEmpty) continue; - await WriteBufferAsync(buffer, cancellationToken).ConfigureAwait(false); + await BaseStream.WriteAsync(buffer, cancellationToken).ConfigureAwait(false); int paddedLength = checked((int)BitUtility.RoundUpToMultipleOf8(buffer.Length)); int padding = paddedLength - buffer.Length; @@ -783,16 +828,6 @@ public async Task WriteEndAsync(CancellationToken cancellationToken = default) } } - private void WriteBuffer(ArrowBuffer arrowBuffer) - { - BaseStream.Write(arrowBuffer.Memory); - } - - private ValueTask WriteBufferAsync(ArrowBuffer arrowBuffer, CancellationToken cancellationToken = default) - { - return BaseStream.WriteAsync(arrowBuffer.Memory, cancellationToken); - } - private protected Offset SerializeSchema(Schema schema) { // Build metadata @@ -1049,13 +1084,18 @@ private async ValueTask WriteIpcMessageLengthAsync(int length, CancellationToken protected int CalculatePadding(long offset, int alignment = 8) { - long result = BitUtility.RoundUpToMultiplePowerOfTwo(offset, alignment) - offset; + long result = CalculatePaddedLength(offset, alignment) - offset; checked { return (int)result; } } + private static long CalculatePaddedLength(long offset, int alignment = 8) + { + return BitUtility.RoundUpToMultiplePowerOfTwo(offset, alignment); + } + private protected void WritePadding(int length) { if (length > 0) diff --git a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs index 6fe0d77ccc977..f47a15fa5f607 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs @@ -109,20 +109,33 @@ public async Task WritesFooterAlignedMultipleOf8Async() await ValidateRecordBatchFile(stream, originalBatch); } - [Fact] - public async Task WriteSlicedArrays() + [Theory] + [InlineData(0, 45)] + [InlineData(3, 45)] + [InlineData(16, 45)] + public async Task WriteSlicedArrays(int sliceOffset, int sliceLength) { // Temporarily only test some types - var includedTypes = new HashSet + var excludedTypes = new HashSet { - ArrowTypeId.Int32, + ArrowTypeId.Boolean, + ArrowTypeId.Binary, + ArrowTypeId.BinaryView, + ArrowTypeId.FixedSizedBinary, + ArrowTypeId.List, + ArrowTypeId.ListView, + ArrowTypeId.FixedSizeList, + ArrowTypeId.Map, + ArrowTypeId.Dictionary, + ArrowTypeId.String, + ArrowTypeId.StringView, + ArrowTypeId.Struct, + ArrowTypeId.Decimal128, + ArrowTypeId.Decimal256, + ArrowTypeId.Union, }; - var excludedTypes = new HashSet( - Enum.GetValues().Where(typeId => !includedTypes.Contains(typeId))); var originalBatch = TestData.CreateSampleRecordBatch(length: 100, excludedTypes: excludedTypes); - const int sliceOffset = 3; - const int sliceLength = 45; var slicedArrays = originalBatch.Arrays .Select(array => ArrowArrayFactory.Slice(array, sliceOffset, sliceLength)) .ToList(); diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index a8e0d77171c97..1a15c59e2fe20 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -49,7 +49,7 @@ public static async Task VerifyReaderAsync(ArrowStreamReader reader, RecordBatch Assert.Null(await reader.ReadNextRecordBatchAsync()); } - public static void CompareBatches(RecordBatch expectedBatch, RecordBatch actualBatch, bool strictCompare = true) + public static void CompareBatches(RecordBatch expectedBatch, RecordBatch actualBatch, bool strictCompare = false) { SchemaComparer.Compare(expectedBatch.Schema, actualBatch.Schema); Assert.Equal(expectedBatch.Length, actualBatch.Length); From f86f0e62cc7a809347b562a8c6c24978caf2409f Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Fri, 12 Apr 2024 14:48:40 +1200 Subject: [PATCH 04/11] Fix BinaryViewArray and FixedSizeBinaryArray offset not accounting for offset --- .../Apache.Arrow/Arrays/BinaryViewArray.cs | 2 +- .../Arrays/FixedSizeBinaryArray.cs | 2 +- .../BinaryViewArrayTests.cs | 40 ++++++++++++++ .../FixedSizeBinaryArrayTests.cs | 52 +++++++++++++++++++ 4 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 csharp/test/Apache.Arrow.Tests/BinaryViewArrayTests.cs create mode 100644 csharp/test/Apache.Arrow.Tests/FixedSizeBinaryArrayTests.cs diff --git a/csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs b/csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs index 4f62dffd1ddeb..b7c9b07336a5a 100644 --- a/csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs @@ -322,7 +322,7 @@ public ReadOnlySpan GetBytes(int index, out bool isNull) BinaryView binaryView = Views[index]; if (binaryView.IsInline) { - return ViewsBuffer.Span.Slice(16 * index + 4, binaryView.Length); + return ViewsBuffer.Span.Slice(16 * (Offset + index) + 4, binaryView.Length); } return DataBuffer(binaryView._bufferIndex).Span.Slice(binaryView._bufferOffset, binaryView.Length); diff --git a/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs b/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs index 0fa7954724f38..9d597ef1624ea 100644 --- a/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/FixedSizeBinaryArray.cs @@ -68,7 +68,7 @@ public ReadOnlySpan GetBytes(int index) } int size = ((FixedSizeBinaryType)Data.DataType).ByteWidth; - return ValueBuffer.Span.Slice(index * size, size); + return ValueBuffer.Span.Slice((Offset + index) * size, size); } int IReadOnlyCollection.Count => Length; diff --git a/csharp/test/Apache.Arrow.Tests/BinaryViewArrayTests.cs b/csharp/test/Apache.Arrow.Tests/BinaryViewArrayTests.cs new file mode 100644 index 0000000000000..7c18a49e96944 --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/BinaryViewArrayTests.cs @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using Xunit; + +namespace Apache.Arrow.Tests; + +public class BinaryViewArrayTests +{ + [Fact] + public void SliceBinaryViewArray() + { + var array = new BinaryViewArray.Builder() + .Append(new byte[] { 0, 1, 2 }) + .Append(new byte[] { 3, 4 }) + .AppendNull() + .Append(new byte[] { 5, 6 }) + .Append(new byte[] { 7, 8 }) + .Build(); + + var slice = (BinaryViewArray)array.Slice(1, 3); + + Assert.Equal(3, slice.Length); + Assert.Equal(new byte[] {3, 4}, slice.GetBytes(0).ToArray()); + Assert.True(slice.GetBytes(1).IsEmpty); + Assert.Equal(new byte[] {5, 6}, slice.GetBytes(2).ToArray()); + } +} diff --git a/csharp/test/Apache.Arrow.Tests/FixedSizeBinaryArrayTests.cs b/csharp/test/Apache.Arrow.Tests/FixedSizeBinaryArrayTests.cs new file mode 100644 index 0000000000000..abc66d6ce9c9d --- /dev/null +++ b/csharp/test/Apache.Arrow.Tests/FixedSizeBinaryArrayTests.cs @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System.Linq; +using Apache.Arrow.Arrays; +using Apache.Arrow.Types; +using Xunit; + +namespace Apache.Arrow.Tests; + +public class FixedSizeBinaryArrayTests +{ + [Fact] + public void SliceFixedSizeBinaryArray() + { + const int byteWidth = 2; + const int length = 5; + const int nullCount = 1; + + var validityBuffer = new ArrowBuffer.BitmapBuilder() + .AppendRange(true, 2) + .Append(false) + .AppendRange(true, 2) + .Build(); + var dataBuffer = new ArrowBuffer.Builder() + .AppendRange(Enumerable.Range(0, length * byteWidth).Select(i => (byte)i)) + .Build(); + var arrayData = new ArrayData( + new FixedSizeBinaryType(byteWidth), + length, nullCount, 0, new [] {validityBuffer, dataBuffer}); + var array = new FixedSizeBinaryArray(arrayData); + + var slice = (FixedSizeBinaryArray)array.Slice(1, 3); + + Assert.Equal(3, slice.Length); + Assert.Equal(new byte[] {2, 3}, slice.GetBytes(0).ToArray()); + Assert.True(slice.GetBytes(1).IsEmpty); + Assert.Equal(new byte[] {6, 7}, slice.GetBytes(2).ToArray()); + } +} From ec48ee8c9cab745a3f34bd3fd1fccdd429328b0a Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Fri, 12 Apr 2024 14:49:11 +1200 Subject: [PATCH 05/11] Implement writing sliced arrays of more types --- .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 56 ++++++++-------- .../ArrowFileWriterTests.cs | 14 ---- .../Apache.Arrow.Tests/ArrowReaderVerifier.cs | 65 ++++++++++++------- 3 files changed, 68 insertions(+), 67 deletions(-) diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index db2e0457baa73..ef6faf2ad1d1d 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -128,24 +128,24 @@ public ArrowRecordBatchFlatBufferBuilder( public void Visit(ListArray array) { - _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); - _buffers.Add(CreateBuffer(array.ValueOffsetsBuffer)); + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); + _buffers.Add(CreateSlicedBuffer(array.ValueOffsetsBuffer, array.Offset, array.Length + 1)); array.Values.Accept(this); } public void Visit(ListViewArray array) { - _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); - _buffers.Add(CreateBuffer(array.ValueOffsetsBuffer)); - _buffers.Add(CreateBuffer(array.SizesBuffer)); + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); + _buffers.Add(CreateSlicedBuffer(array.ValueOffsetsBuffer, array.Offset, array.Length)); + _buffers.Add(CreateSlicedBuffer(array.SizesBuffer, array.Offset, array.Length)); array.Values.Accept(this); } public void Visit(FixedSizeListArray array) { - _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); array.Values.Accept(this); } @@ -156,15 +156,15 @@ public void Visit(FixedSizeListArray array) public void Visit(BinaryArray array) { - _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); - _buffers.Add(CreateBuffer(array.ValueOffsetsBuffer)); + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); + _buffers.Add(CreateSlicedBuffer(array.ValueOffsetsBuffer, array.Offset, array.Length + 1)); _buffers.Add(CreateBuffer(array.ValueBuffer)); } public void Visit(BinaryViewArray array) { - _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); - _buffers.Add(CreateBuffer(array.ViewsBuffer)); + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); + _buffers.Add(CreateSlicedBuffer(array.ViewsBuffer, array.Offset, array.Length)); for (int i = 0; i < array.DataBufferCount; i++) { _buffers.Add(CreateBuffer(array.DataBuffer(i))); @@ -175,25 +175,18 @@ public void Visit(BinaryViewArray array) public void Visit(FixedSizeBinaryArray array) { - _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); - _buffers.Add(CreateBuffer(array.ValueBuffer)); + var itemSize = ((FixedSizeBinaryType)array.Data.DataType).ByteWidth; + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); + _buffers.Add(CreateSlicedBuffer(array.ValueBuffer, itemSize, array.Offset, array.Length)); } - public void Visit(Decimal128Array array) - { - _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); - _buffers.Add(CreateBuffer(array.ValueBuffer)); - } + public void Visit(Decimal128Array array) => Visit(array as FixedSizeBinaryArray); - public void Visit(Decimal256Array array) - { - _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); - _buffers.Add(CreateBuffer(array.ValueBuffer)); - } + public void Visit(Decimal256Array array) => Visit(array as FixedSizeBinaryArray); public void Visit(StructArray array) { - _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); for (int i = 0; i < array.Fields.Count; i++) { @@ -222,8 +215,7 @@ public void Visit(DictionaryArray array) // Dictionary is serialized separately in Dictionary serialization. // We are only interested in indices at this context. - _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); - _buffers.Add(CreateBuffer(array.IndicesBuffer)); + array.Indices.Accept(this); } public void Visit(NullArray array) @@ -233,8 +225,8 @@ public void Visit(NullArray array) private void CreateBuffers(BooleanArray array) { - _buffers.Add(CreateBuffer(array.NullBitmapBuffer)); - _buffers.Add(CreateBuffer(array.ValueBuffer)); + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); + _buffers.Add(CreateBitmapBuffer(array.ValueBuffer, array.Offset, array.Length)); } private void CreateBuffers(PrimitiveArray array) @@ -277,9 +269,13 @@ private Buffer CreateBitmapBuffer(ArrowBuffer buffer, int offset, int length) private Buffer CreateSlicedBuffer(ArrowBuffer buffer, int offset, int length) where T : struct { - var size = Unsafe.SizeOf(); - var byteOffset = offset * size; - var byteLength = length * size; + return CreateSlicedBuffer(buffer, Unsafe.SizeOf(), offset, length); + } + + private Buffer CreateSlicedBuffer(ArrowBuffer buffer, int itemSize, int offset, int length) + { + var byteOffset = offset * itemSize; + var byteLength = length * itemSize; var paddedLength = (int)CalculatePaddedLength(byteLength); var sliceLength = Math.Min(paddedLength, buffer.Length - byteOffset); return CreateBuffer(buffer.Memory.Slice(byteOffset, sliceLength)); diff --git a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs index f47a15fa5f607..04ebacb77ca06 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs @@ -118,20 +118,6 @@ public async Task WriteSlicedArrays(int sliceOffset, int sliceLength) // Temporarily only test some types var excludedTypes = new HashSet { - ArrowTypeId.Boolean, - ArrowTypeId.Binary, - ArrowTypeId.BinaryView, - ArrowTypeId.FixedSizedBinary, - ArrowTypeId.List, - ArrowTypeId.ListView, - ArrowTypeId.FixedSizeList, - ArrowTypeId.Map, - ArrowTypeId.Dictionary, - ArrowTypeId.String, - ArrowTypeId.StringView, - ArrowTypeId.Struct, - ArrowTypeId.Decimal128, - ArrowTypeId.Decimal256, ArrowTypeId.Union, }; diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index 1a15c59e2fe20..6b85a220c9f6d 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -160,7 +160,7 @@ public void Visit(StructArray array) Assert.Equal(expectedArray.Length, array.Length); Assert.Equal(expectedArray.NullCount, array.NullCount); - Assert.Equal(expectedArray.Offset, array.Offset); + Assert.Equal(0, array.Offset); Assert.Equal(expectedArray.Data.Children.Length, array.Data.Children.Length); Assert.Equal(expectedArray.Fields.Count, array.Fields.Count); @@ -220,9 +220,9 @@ private void CompareBinaryArrays(BinaryArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(expectedArray.Offset, actualArray.Offset); + Assert.Equal(0, actualArray.Offset); - CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); + CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); if (_strictCompare) { @@ -252,9 +252,9 @@ private void CompareVariadicArrays(BinaryViewArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(expectedArray.Offset, actualArray.Offset); + Assert.Equal(0, actualArray.Offset); - CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); + CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); Assert.True(expectedArray.Views.SequenceEqual(actualArray.Views)); @@ -277,9 +277,9 @@ private void CompareArrays(FixedSizeBinaryArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(expectedArray.Offset, actualArray.Offset); + Assert.Equal(0, actualArray.Offset); - CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); + CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); if (_strictCompare) { @@ -308,7 +308,7 @@ private void CompareArrays(PrimitiveArray actualArray) Assert.Equal(expectedArray.NullCount, actualArray.NullCount); Assert.Equal(0, actualArray.Offset); - CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); + CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); if (_strictCompare) { @@ -338,9 +338,9 @@ private void CompareArrays(BooleanArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(expectedArray.Offset, actualArray.Offset); + Assert.Equal(0, actualArray.Offset); - CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); + CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); if (_strictCompare) { @@ -365,9 +365,9 @@ private void CompareArrays(ListArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(expectedArray.Offset, actualArray.Offset); + Assert.Equal(0, actualArray.Offset); - CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); + CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); if (_strictCompare) { @@ -375,8 +375,9 @@ private void CompareArrays(ListArray actualArray) } else { + int offsetsStart = (expectedArray.Offset) * sizeof(int); int offsetsLength = (expectedArray.Length + 1) * sizeof(int); - Assert.True(expectedArray.ValueOffsetsBuffer.Span.Slice(0, offsetsLength).SequenceEqual(actualArray.ValueOffsetsBuffer.Span.Slice(0, offsetsLength))); + Assert.True(expectedArray.ValueOffsetsBuffer.Span.Slice(offsetsStart, offsetsLength).SequenceEqual(actualArray.ValueOffsetsBuffer.Span.Slice(0, offsetsLength))); } actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare)); @@ -391,9 +392,9 @@ private void CompareArrays(ListViewArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(expectedArray.Offset, actualArray.Offset); + Assert.Equal(0, actualArray.Offset); - CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); + CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); if (_strictCompare) { @@ -402,9 +403,10 @@ private void CompareArrays(ListViewArray actualArray) } else { + int start = expectedArray.Offset * sizeof(int); int length = expectedArray.Length * sizeof(int); - Assert.True(expectedArray.ValueOffsetsBuffer.Span.Slice(0, length).SequenceEqual(actualArray.ValueOffsetsBuffer.Span.Slice(0, length))); - Assert.True(expectedArray.SizesBuffer.Span.Slice(0, length).SequenceEqual(actualArray.SizesBuffer.Span.Slice(0, length))); + Assert.True(expectedArray.ValueOffsetsBuffer.Span.Slice(start, length).SequenceEqual(actualArray.ValueOffsetsBuffer.Span.Slice(0, length))); + Assert.True(expectedArray.SizesBuffer.Span.Slice(start, length).SequenceEqual(actualArray.SizesBuffer.Span.Slice(0, length))); } actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare)); @@ -419,23 +421,28 @@ private void CompareArrays(FixedSizeListArray actualArray) Assert.Equal(expectedArray.Length, actualArray.Length); Assert.Equal(expectedArray.NullCount, actualArray.NullCount); - Assert.Equal(expectedArray.Offset, actualArray.Offset); + Assert.Equal(0, actualArray.Offset); - CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer); + CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare)); } - private void CompareValidityBuffer(int nullCount, int arrayLength, ArrowBuffer expectedValidityBuffer, ArrowBuffer actualValidityBuffer) + private void CompareValidityBuffer(int nullCount, int arrayLength, ArrowBuffer expectedValidityBuffer, int expectedBufferOffset, ArrowBuffer actualValidityBuffer) { if (_strictCompare) { Assert.True(expectedValidityBuffer.Span.SequenceEqual(actualValidityBuffer.Span)); } - else if (nullCount != 0 && arrayLength > 0) + else if (actualValidityBuffer.IsEmpty) + { + Assert.True(nullCount == 0 || arrayLength == 0); + } + else if (expectedBufferOffset % 8 == 0) { int validityBitmapByteCount = BitUtility.ByteCount(arrayLength); - ReadOnlySpan expectedSpanPartial = expectedValidityBuffer.Span.Slice(0, validityBitmapByteCount - 1); + int byteOffset = BitUtility.ByteCount(expectedBufferOffset); + ReadOnlySpan expectedSpanPartial = expectedValidityBuffer.Span.Slice(byteOffset, validityBitmapByteCount - 1); ReadOnlySpan actualSpanPartial = actualValidityBuffer.Span.Slice(0, validityBitmapByteCount - 1); // Compare the first validityBitmapByteCount - 1 bytes @@ -445,7 +452,7 @@ private void CompareValidityBuffer(int nullCount, int arrayLength, ArrowBuffer e // Compare the last byte bitwise (because there is no guarantee about the value of // bits outside the range [0, arrayLength]) - ReadOnlySpan expectedSpanFull = expectedValidityBuffer.Span.Slice(0, validityBitmapByteCount); + ReadOnlySpan expectedSpanFull = expectedValidityBuffer.Span.Slice(byteOffset, validityBitmapByteCount); ReadOnlySpan actualSpanFull = actualValidityBuffer.Span.Slice(0, validityBitmapByteCount); for (int i = 8 * (validityBitmapByteCount - 1); i < arrayLength; i++) { @@ -454,6 +461,18 @@ private void CompareValidityBuffer(int nullCount, int arrayLength, ArrowBuffer e string.Format("Bit at index {0}/{1} is not equal", i, arrayLength)); } } + else + { + // Have to compare all values bitwise + var expectedSpan = expectedValidityBuffer.Span; + var actualSpan = actualValidityBuffer.Span; + for (int i = 0; i < arrayLength; i++) + { + Assert.True( + BitUtility.GetBit(expectedSpan, expectedBufferOffset + i) == BitUtility.GetBit(actualSpan, i), + string.Format("Bit at index {0}/{1} is not equal", i, arrayLength)); + } + } } } } From 82cac8df3549058b2ee3d61d07ac96c8ad2cf15d Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Fri, 12 Apr 2024 15:27:38 +1200 Subject: [PATCH 06/11] Implement writing sliced union arrays --- .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 24 +++++++++++++++---- .../ArrowFileWriterTests.cs | 8 +------ .../Apache.Arrow.Tests/ArrowReaderVerifier.cs | 2 +- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index ef6faf2ad1d1d..b970103ca1c2e 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -196,16 +196,17 @@ public void Visit(StructArray array) public void Visit(UnionArray array) { - _buffers.Add(CreateBuffer(array.TypeBuffer)); + _buffers.Add(CreateSlicedBuffer(array.TypeBuffer, array.Offset, array.Length)); ArrowBuffer? offsets = (array as DenseUnionArray)?.ValueOffsetBuffer; if (offsets != null) { - _buffers.Add(CreateBuffer(offsets.Value)); + _buffers.Add(CreateSlicedBuffer(offsets.Value, array.Offset, array.Length)); } for (int i = 0; i < array.Fields.Count; i++) { + // Sparse union arrays will be sliced if required when accessed array.Fields[i].Accept(this); } } @@ -413,10 +414,23 @@ private void CreateSelfAndChildrenFieldNodes(ArrayData data) { if (data.DataType is NestedType) { - // flatbuffer struct vectors have to be created in reverse order - for (int i = data.Children.Length - 1; i >= 0; i--) + // TODO: Tidy this up somehow, check other types, add more tests + if (data.DataType is UnionType {Mode: UnionMode.Sparse} || data.DataType is StructType) { - CreateSelfAndChildrenFieldNodes(data.Children[i]); + for (int i = data.Children.Length - 1; i >= 0; i--) + { + var child = data.Children[i]; + var slicedChild = child.Slice(data.Offset, data.Length); + CreateSelfAndChildrenFieldNodes(slicedChild); + } + } + else + { + // flatbuffer struct vectors have to be created in reverse order + for (int i = data.Children.Length - 1; i >= 0; i--) + { + CreateSelfAndChildrenFieldNodes(data.Children[i]); + } } } Flatbuf.FieldNode.CreateFieldNode(Builder, data.Length, data.GetNullCount()); diff --git a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs index 04ebacb77ca06..4bc178da28629 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs @@ -115,13 +115,7 @@ public async Task WritesFooterAlignedMultipleOf8Async() [InlineData(16, 45)] public async Task WriteSlicedArrays(int sliceOffset, int sliceLength) { - // Temporarily only test some types - var excludedTypes = new HashSet - { - ArrowTypeId.Union, - }; - - var originalBatch = TestData.CreateSampleRecordBatch(length: 100, excludedTypes: excludedTypes); + var originalBatch = TestData.CreateSampleRecordBatch(length: 100); var slicedArrays = originalBatch.Arrays .Select(array => ArrowArrayFactory.Slice(array, sliceOffset, sliceLength)) .ToList(); diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index 6b85a220c9f6d..8b2fce5d1e6ff 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -178,7 +178,7 @@ public void Visit(UnionArray array) Assert.Equal(expectedArray.Mode, array.Mode); Assert.Equal(expectedArray.Length, array.Length); Assert.Equal(expectedArray.NullCount, array.NullCount); - Assert.Equal(expectedArray.Offset, array.Offset); + Assert.Equal(0, array.Offset); Assert.Equal(expectedArray.Data.Children.Length, array.Data.Children.Length); Assert.Equal(expectedArray.Fields.Count, array.Fields.Count); From 2c96ed45249717b0c7fb2157937d2f3e6f35a033 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Mon, 15 Apr 2024 11:20:57 +1200 Subject: [PATCH 07/11] Fix FixedSizeList --- csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 12 +++++++++++- .../test/Apache.Arrow.Tests/ArrowReaderVerifier.cs | 5 ++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index b970103ca1c2e..48d55244c5a20 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -147,7 +147,11 @@ public void Visit(FixedSizeListArray array) { _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); - array.Values.Accept(this); + var listSize = ((FixedSizeListType)array.Data.DataType).ListSize; + var valuesSlice = + ArrowArrayFactory.Slice(array.Values, array.Offset * listSize, array.Length * listSize); + + valuesSlice.Accept(this); } public void Visit(StringArray array) => Visit(array as BinaryArray); @@ -424,6 +428,12 @@ private void CreateSelfAndChildrenFieldNodes(ArrayData data) CreateSelfAndChildrenFieldNodes(slicedChild); } } + else if (data.DataType is FixedSizeListType fixedSizeListType) + { + var listSize = fixedSizeListType.ListSize; + var slicedChild = data.Children[0].Slice(data.Offset * listSize, data.Length * listSize); + CreateSelfAndChildrenFieldNodes(slicedChild); + } else { // flatbuffer struct vectors have to be created in reverse order diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index 8b2fce5d1e6ff..b8ac863141d46 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -425,7 +425,10 @@ private void CompareArrays(FixedSizeListArray actualArray) CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, expectedArray.Offset, actualArray.NullBitmapBuffer); - actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare)); + var listSize = ((FixedSizeListType)expectedArray.Data.DataType).ListSize; + var expectedValuesSlice = ArrowArrayFactory.Slice( + expectedArray.Values, expectedArray.Offset * listSize, expectedArray.Length * listSize); + actualArray.Values.Accept(new ArrayComparer(expectedValuesSlice, _strictCompare)); } private void CompareValidityBuffer(int nullCount, int arrayLength, ArrowBuffer expectedValidityBuffer, int expectedBufferOffset, ArrowBuffer actualValidityBuffer) From a56b9977f79db4c248555cf83f41aa91d352b9bf Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Mon, 15 Apr 2024 12:34:27 +1200 Subject: [PATCH 08/11] Refactor writer logic to keep field node and buffer logic together --- .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 192 +++++++----------- 1 file changed, 79 insertions(+), 113 deletions(-) diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index 48d55244c5a20..2952d2888b055 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -70,6 +70,18 @@ private class ArrowRecordBatchFlatBufferBuilder : IArrowArrayVisitor, IArrowArrayVisitor { + public readonly struct FieldNode + { + public readonly int Length; + public readonly int NullCount; + + public FieldNode(int length, int nullCount) + { + Length = length; + NullCount = nullCount; + } + } + public readonly struct Buffer { public readonly ReadOnlyMemory DataBuffer; @@ -82,11 +94,13 @@ public Buffer(ReadOnlyMemory buffer, int offset) } } + private readonly List _fieldNodes; private readonly List _buffers; private readonly ICompressionCodec _compressionCodec; private readonly MemoryAllocator _allocator; private readonly MemoryStream _compressionStream; + public IReadOnlyList FieldNodes => _fieldNodes; public IReadOnlyList Buffers => _buffers; public List VariadicCounts { get; private set; } @@ -98,40 +112,60 @@ public ArrowRecordBatchFlatBufferBuilder( _compressionCodec = compressionCodec; _compressionStream = compressionStream; _allocator = allocator; + _fieldNodes = new List(); _buffers = new List(); TotalLength = 0; } - public void Visit(Int8Array array) => CreateBuffers(array); - public void Visit(Int16Array array) => CreateBuffers(array); - public void Visit(Int32Array array) => CreateBuffers(array); - public void Visit(Int64Array array) => CreateBuffers(array); - public void Visit(UInt8Array array) => CreateBuffers(array); - public void Visit(UInt16Array array) => CreateBuffers(array); - public void Visit(UInt32Array array) => CreateBuffers(array); - public void Visit(UInt64Array array) => CreateBuffers(array); + public void VisitArray(IArrowArray array) + { + _fieldNodes.Add(new FieldNode(array.Length, array.NullCount)); + + array.Accept(this); + } + + public void Visit(Int8Array array) => VisitPrimitiveArray(array); + public void Visit(Int16Array array) => VisitPrimitiveArray(array); + public void Visit(Int32Array array) => VisitPrimitiveArray(array); + public void Visit(Int64Array array) => VisitPrimitiveArray(array); + public void Visit(UInt8Array array) => VisitPrimitiveArray(array); + public void Visit(UInt16Array array) => VisitPrimitiveArray(array); + public void Visit(UInt32Array array) => VisitPrimitiveArray(array); + public void Visit(UInt64Array array) => VisitPrimitiveArray(array); #if NET5_0_OR_GREATER - public void Visit(HalfFloatArray array) => CreateBuffers(array); + public void Visit(HalfFloatArray array) => VisitPrimitiveArray(array); #endif - public void Visit(FloatArray array) => CreateBuffers(array); - public void Visit(DoubleArray array) => CreateBuffers(array); - public void Visit(TimestampArray array) => CreateBuffers(array); - public void Visit(BooleanArray array) => CreateBuffers(array); - public void Visit(Date32Array array) => CreateBuffers(array); - public void Visit(Date64Array array) => CreateBuffers(array); - public void Visit(Time32Array array) => CreateBuffers(array); - public void Visit(Time64Array array) => CreateBuffers(array); - public void Visit(DurationArray array) => CreateBuffers(array); - public void Visit(YearMonthIntervalArray array) => CreateBuffers(array); - public void Visit(DayTimeIntervalArray array) => CreateBuffers(array); - public void Visit(MonthDayNanosecondIntervalArray array) => CreateBuffers(array); + public void Visit(FloatArray array) => VisitPrimitiveArray(array); + public void Visit(DoubleArray array) => VisitPrimitiveArray(array); + public void Visit(TimestampArray array) => VisitPrimitiveArray(array); + public void Visit(Date32Array array) => VisitPrimitiveArray(array); + public void Visit(Date64Array array) => VisitPrimitiveArray(array); + public void Visit(Time32Array array) => VisitPrimitiveArray(array); + public void Visit(Time64Array array) => VisitPrimitiveArray(array); + public void Visit(DurationArray array) => VisitPrimitiveArray(array); + public void Visit(YearMonthIntervalArray array) => VisitPrimitiveArray(array); + public void Visit(DayTimeIntervalArray array) => VisitPrimitiveArray(array); + public void Visit(MonthDayNanosecondIntervalArray array) => VisitPrimitiveArray(array); + + private void VisitPrimitiveArray(PrimitiveArray array) + where T : struct + { + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); + _buffers.Add(CreateSlicedBuffer(array.ValueBuffer, array.Offset, array.Length)); + } + + public void Visit(BooleanArray array) + { + _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); + _buffers.Add(CreateBitmapBuffer(array.ValueBuffer, array.Offset, array.Length)); + } public void Visit(ListArray array) { _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); _buffers.Add(CreateSlicedBuffer(array.ValueOffsetsBuffer, array.Offset, array.Length + 1)); - array.Values.Accept(this); + VisitArray(array.Values); } public void Visit(ListViewArray array) @@ -140,7 +174,7 @@ public void Visit(ListViewArray array) _buffers.Add(CreateSlicedBuffer(array.ValueOffsetsBuffer, array.Offset, array.Length)); _buffers.Add(CreateSlicedBuffer(array.SizesBuffer, array.Offset, array.Length)); - array.Values.Accept(this); + VisitArray(array.Values); } public void Visit(FixedSizeListArray array) @@ -151,7 +185,7 @@ public void Visit(FixedSizeListArray array) var valuesSlice = ArrowArrayFactory.Slice(array.Values, array.Offset * listSize, array.Length * listSize); - valuesSlice.Accept(this); + VisitArray(valuesSlice); } public void Visit(StringArray array) => Visit(array as BinaryArray); @@ -194,7 +228,8 @@ public void Visit(StructArray array) for (int i = 0; i < array.Fields.Count; i++) { - array.Fields[i].Accept(this); + // Fields property accessor handles slicing field arrays if required + VisitArray(array.Fields[i]); } } @@ -210,8 +245,8 @@ public void Visit(UnionArray array) for (int i = 0; i < array.Fields.Count; i++) { - // Sparse union arrays will be sliced if required when accessed - array.Fields[i].Accept(this); + // Fields property accessor handles slicing field arrays for sparse union arrays if required + VisitArray(array.Fields[i]); } } @@ -228,19 +263,6 @@ public void Visit(NullArray array) // There are no buffers for a NullArray } - private void CreateBuffers(BooleanArray array) - { - _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); - _buffers.Add(CreateBitmapBuffer(array.ValueBuffer, array.Offset, array.Length)); - } - - private void CreateBuffers(PrimitiveArray array) - where T : struct - { - _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); - _buffers.Add(CreateSlicedBuffer(array.ValueBuffer, array.Offset, array.Length)); - } - private Buffer CreateBitmapBuffer(ArrowBuffer buffer, int offset, int length) { if (buffer.IsEmpty) @@ -414,48 +436,6 @@ public ArrowStreamWriter(Stream baseStream, Schema schema, bool leaveOpen, IpcOp } } - private void CreateSelfAndChildrenFieldNodes(ArrayData data) - { - if (data.DataType is NestedType) - { - // TODO: Tidy this up somehow, check other types, add more tests - if (data.DataType is UnionType {Mode: UnionMode.Sparse} || data.DataType is StructType) - { - for (int i = data.Children.Length - 1; i >= 0; i--) - { - var child = data.Children[i]; - var slicedChild = child.Slice(data.Offset, data.Length); - CreateSelfAndChildrenFieldNodes(slicedChild); - } - } - else if (data.DataType is FixedSizeListType fixedSizeListType) - { - var listSize = fixedSizeListType.ListSize; - var slicedChild = data.Children[0].Slice(data.Offset * listSize, data.Length * listSize); - CreateSelfAndChildrenFieldNodes(slicedChild); - } - else - { - // flatbuffer struct vectors have to be created in reverse order - for (int i = data.Children.Length - 1; i >= 0; i--) - { - CreateSelfAndChildrenFieldNodes(data.Children[i]); - } - } - } - Flatbuf.FieldNode.CreateFieldNode(Builder, data.Length, data.GetNullCount()); - } - - private static int CountAllNodes(IReadOnlyList fields) - { - int count = 0; - foreach (Field arrowArray in fields) - { - CountSelfAndChildrenNodes(arrowArray.DataType, ref count); - } - return count; - } - private Offset GetBodyCompression() { if (_options.CompressionCodec == null) @@ -473,18 +453,6 @@ private static int CountAllNodes(IReadOnlyList fields) Builder, compressionType, Flatbuf.BodyCompressionMethod.BUFFER); } - private static void CountSelfAndChildrenNodes(IArrowType type, ref int count) - { - if (type is NestedType nestedType) - { - foreach (Field childField in nestedType.Fields) - { - CountSelfAndChildrenNodes(childField.DataType, ref count); - } - } - count++; - } - private protected void WriteRecordBatchInternal(RecordBatch recordBatch) { // TODO: Truncate buffers with extraneous padding / unused capacity @@ -636,22 +604,6 @@ private Tuple Pre { Builder.Clear(); - // Serialize field nodes - - int fieldCount = fields.Count; - - Flatbuf.RecordBatch.StartNodesVector(Builder, CountAllNodes(fields)); - - // flatbuffer struct vectors have to be created in reverse order - for (int i = fieldCount - 1; i >= 0; i--) - { - CreateSelfAndChildrenFieldNodes(arrays[i].Data); - } - - VectorOffset fieldNodesVectorOffset = Builder.EndVector(); - - // Serialize buffers - // CompressionCodec can be disposed after all data is visited by the builder, // and doesn't need to be alive for the full lifetime of the ArrowRecordBatchFlatBufferBuilder using var compressionCodec = _options.CompressionCodec.HasValue @@ -659,20 +611,34 @@ private Tuple Pre : null; var recordBatchBuilder = new ArrowRecordBatchFlatBufferBuilder(compressionCodec, _allocator, _compressionStream); - for (int i = 0; i < fieldCount; i++) + + // Visit all arrays recursively + for (int i = 0; i < fields.Count; i++) { IArrowArray fieldArray = arrays[i]; - fieldArray.Accept(recordBatchBuilder); + recordBatchBuilder.VisitArray(fieldArray); } + // Serialize field nodes + IReadOnlyList fieldNodes = recordBatchBuilder.FieldNodes; + Flatbuf.RecordBatch.StartNodesVector(Builder, fieldNodes.Count); + + // flatbuffer struct vectors have to be created in reverse order + for (int i = fieldNodes.Count - 1; i >= 0; i--) + { + Flatbuf.FieldNode.CreateFieldNode(Builder, fieldNodes[i].Length, fieldNodes[i].NullCount); + } + + VectorOffset fieldNodesVectorOffset = Builder.EndVector(); + VectorOffset variadicCountOffset = default; if (recordBatchBuilder.VariadicCounts != null) { variadicCountOffset = Flatbuf.RecordBatch.CreateVariadicCountsVectorBlock(Builder, recordBatchBuilder.VariadicCounts.ToArray()); } + // Serialize buffers IReadOnlyList buffers = recordBatchBuilder.Buffers; - Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count); // flatbuffer struct vectors have to be created in reverse order From ef2d004efd96524fbcf16bd532989d972ac39856 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Mon, 15 Apr 2024 13:00:16 +1200 Subject: [PATCH 09/11] Fix padding handling --- .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 27 ++++++++++++------- .../ArrowFileWriterTests.cs | 8 +++--- .../Apache.Arrow.Tests/ArrowReaderVerifier.cs | 2 +- 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index 2952d2888b055..6127c5a662dfe 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -270,7 +270,7 @@ private Buffer CreateBitmapBuffer(ArrowBuffer buffer, int offset, int length) return CreateBuffer(buffer.Memory); } - var paddedLength = (int)CalculatePaddedLength(BitUtility.ByteCount(length)); + var paddedLength = CalculatePaddedBufferLength(BitUtility.ByteCount(length)); if (offset % 8 == 0) { var byteOffset = offset / 8; @@ -289,7 +289,7 @@ private Buffer CreateBitmapBuffer(ArrowBuffer buffer, int offset, int length) BitUtility.SetBit(outputSpan, i, BitUtility.GetBit(inputSpan, offset + i)); } - return CreateBuffer(memoryOwner.Memory.Slice(0, paddedLength)); + return CreateBuffer(memoryOwner.Memory); } } @@ -301,11 +301,16 @@ private Buffer CreateSlicedBuffer(ArrowBuffer buffer, int offset, int length) private Buffer CreateSlicedBuffer(ArrowBuffer buffer, int itemSize, int offset, int length) { - var byteOffset = offset * itemSize; var byteLength = length * itemSize; - var paddedLength = (int)CalculatePaddedLength(byteLength); - var sliceLength = Math.Min(paddedLength, buffer.Length - byteOffset); - return CreateBuffer(buffer.Memory.Slice(byteOffset, sliceLength)); + var paddedLength = CalculatePaddedBufferLength(byteLength); + if (offset != 0 || paddedLength < buffer.Length) + { + var byteOffset = offset * itemSize; + var sliceLength = Math.Min(paddedLength, buffer.Length - byteOffset); + return CreateBuffer(buffer.Memory.Slice(byteOffset, sliceLength)); + } + + return CreateBuffer(buffer.Memory); } private Buffer CreateBuffer(ArrowBuffer buffer) @@ -1070,16 +1075,20 @@ private async ValueTask WriteIpcMessageLengthAsync(int length, CancellationToken protected int CalculatePadding(long offset, int alignment = 8) { - long result = CalculatePaddedLength(offset, alignment) - offset; + long result = BitUtility.RoundUpToMultiplePowerOfTwo(offset, alignment) - offset; checked { return (int)result; } } - private static long CalculatePaddedLength(long offset, int alignment = 8) + private static int CalculatePaddedBufferLength(int length) { - return BitUtility.RoundUpToMultiplePowerOfTwo(offset, alignment); + long result = BitUtility.RoundUpToMultiplePowerOfTwo(length, MemoryAllocator.DefaultAlignment); + checked + { + return (int)result; + } } private protected void WritePadding(int length) diff --git a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs index 4bc178da28629..faf650973d64c 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowFileWriterTests.cs @@ -129,16 +129,18 @@ public async Task WriteSlicedArrays(int sliceOffset, int sliceLength) stream.Position = 0; - await ValidateRecordBatchFile(stream, slicedBatch); + // Disable strict comparison because we don't expect buffers to match exactly + // due to writing slices of buffers, and instead need to compare array values + await ValidateRecordBatchFile(stream, slicedBatch, strictCompare: false); } - private async Task ValidateRecordBatchFile(Stream stream, RecordBatch recordBatch) + private async Task ValidateRecordBatchFile(Stream stream, RecordBatch recordBatch, bool strictCompare = true) { var reader = new ArrowFileReader(stream); int count = await reader.RecordBatchCountAsync(); Assert.Equal(1, count); RecordBatch readBatch = await reader.ReadRecordBatchAsync(0); - ArrowReaderVerifier.CompareBatches(recordBatch, readBatch); + ArrowReaderVerifier.CompareBatches(recordBatch, readBatch, strictCompare); } /// diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index b8ac863141d46..79c47f8a461c6 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -49,7 +49,7 @@ public static async Task VerifyReaderAsync(ArrowStreamReader reader, RecordBatch Assert.Null(await reader.ReadNextRecordBatchAsync()); } - public static void CompareBatches(RecordBatch expectedBatch, RecordBatch actualBatch, bool strictCompare = false) + public static void CompareBatches(RecordBatch expectedBatch, RecordBatch actualBatch, bool strictCompare = true) { SchemaComparer.Compare(expectedBatch.Schema, actualBatch.Schema); Assert.Equal(expectedBatch.Length, actualBatch.Length); From 40fd78f276de102716dcaef7156205bebf14b804 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Mon, 15 Apr 2024 15:35:54 +1200 Subject: [PATCH 10/11] Add StreamWriter tests --- .../ArrowStreamWriterTests.cs | 46 +++++++++++++++---- 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs index c4c0b6ec9ff21..db8369fa618e9 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowStreamWriterTests.cs @@ -203,7 +203,37 @@ public async Task WriteBatchWithNullsAsync() await TestRoundTripRecordBatchAsync(originalBatch); } - private static void TestRoundTripRecordBatches(List originalBatches, IpcOptions options = null) + [Theory] + [InlineData(0, 45)] + [InlineData(3, 45)] + [InlineData(16, 45)] + public void WriteSlicedArrays(int sliceOffset, int sliceLength) + { + var originalBatch = TestData.CreateSampleRecordBatch(length: 100); + var slicedArrays = originalBatch.Arrays + .Select(array => ArrowArrayFactory.Slice(array, sliceOffset, sliceLength)) + .ToList(); + var slicedBatch = new RecordBatch(originalBatch.Schema, slicedArrays, sliceLength); + + TestRoundTripRecordBatch(slicedBatch, strictCompare: false); + } + + [Theory] + [InlineData(0, 45)] + [InlineData(3, 45)] + [InlineData(16, 45)] + public async Task WriteSlicedArraysAsync(int sliceOffset, int sliceLength) + { + var originalBatch = TestData.CreateSampleRecordBatch(length: 100); + var slicedArrays = originalBatch.Arrays + .Select(array => ArrowArrayFactory.Slice(array, sliceOffset, sliceLength)) + .ToList(); + var slicedBatch = new RecordBatch(originalBatch.Schema, slicedArrays, sliceLength); + + await TestRoundTripRecordBatchAsync(slicedBatch, strictCompare: false); + } + + private static void TestRoundTripRecordBatches(List originalBatches, IpcOptions options = null, bool strictCompare = true) { using (MemoryStream stream = new MemoryStream()) { @@ -223,13 +253,13 @@ private static void TestRoundTripRecordBatches(List originalBatches foreach (RecordBatch originalBatch in originalBatches) { RecordBatch newBatch = reader.ReadNextRecordBatch(); - ArrowReaderVerifier.CompareBatches(originalBatch, newBatch); + ArrowReaderVerifier.CompareBatches(originalBatch, newBatch, strictCompare: strictCompare); } } } } - private static async Task TestRoundTripRecordBatchesAsync(List originalBatches, IpcOptions options = null) + private static async Task TestRoundTripRecordBatchesAsync(List originalBatches, IpcOptions options = null, bool strictCompare = true) { using (MemoryStream stream = new MemoryStream()) { @@ -249,20 +279,20 @@ private static async Task TestRoundTripRecordBatchesAsync(List orig foreach (RecordBatch originalBatch in originalBatches) { RecordBatch newBatch = reader.ReadNextRecordBatch(); - ArrowReaderVerifier.CompareBatches(originalBatch, newBatch); + ArrowReaderVerifier.CompareBatches(originalBatch, newBatch, strictCompare: strictCompare); } } } } - private static void TestRoundTripRecordBatch(RecordBatch originalBatch, IpcOptions options = null) + private static void TestRoundTripRecordBatch(RecordBatch originalBatch, IpcOptions options = null, bool strictCompare = true) { - TestRoundTripRecordBatches(new List { originalBatch }, options); + TestRoundTripRecordBatches(new List { originalBatch }, options, strictCompare: strictCompare); } - private static async Task TestRoundTripRecordBatchAsync(RecordBatch originalBatch, IpcOptions options = null) + private static async Task TestRoundTripRecordBatchAsync(RecordBatch originalBatch, IpcOptions options = null, bool strictCompare = true) { - await TestRoundTripRecordBatchesAsync(new List { originalBatch }, options); + await TestRoundTripRecordBatchesAsync(new List { originalBatch }, options, strictCompare: strictCompare); } [Fact] From 5c23c136fd95154fae19eb049262902faaf6bf00 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Mon, 15 Apr 2024 16:35:34 +1200 Subject: [PATCH 11/11] More thorough union array testing --- .../ArrowArrayConcatenatorTests.cs | 6 ++++ .../Apache.Arrow.Tests/ArrowReaderVerifier.cs | 32 +++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs index 25ef289f0dc25..700de58adb8c1 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs @@ -29,6 +29,12 @@ public void TestStandardCases() { foreach ((List testTargetArrayList, IArrowArray expectedArray) in GenerateTestData()) { + if (expectedArray is UnionArray) + { + // Union array concatenation is incorrect. See https://github.com/apache/arrow/issues/41198 + continue; + } + IArrowArray actualArray = ArrowArrayConcatenator.Concatenate(testTargetArrayList); ArrowReaderVerifier.CompareArrays(expectedArray, actualArray); } diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs index 79c47f8a461c6..07c8aa3f56b3b 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs @@ -182,6 +182,38 @@ public void Visit(UnionArray array) Assert.Equal(expectedArray.Data.Children.Length, array.Data.Children.Length); Assert.Equal(expectedArray.Fields.Count, array.Fields.Count); + if (_strictCompare) + { + Assert.True(expectedArray.TypeBuffer.Span.SequenceEqual(array.TypeBuffer.Span)); + } + else + { + for (int i = 0; i < expectedArray.Length; i++) + { + Assert.Equal(expectedArray.TypeIds[i], array.TypeIds[i]); + } + } + + if (_expectedArray is DenseUnionArray expectedDenseArray) + { + Assert.IsAssignableFrom(array); + var denseArray = array as DenseUnionArray; + Assert.NotNull(denseArray); + + if (_strictCompare) + { + Assert.True(expectedDenseArray.ValueOffsetBuffer.Span.SequenceEqual(denseArray.ValueOffsetBuffer.Span)); + } + else + { + for (int i = 0; i < expectedDenseArray.Length; i++) + { + Assert.Equal( + expectedDenseArray.ValueOffsets[i], denseArray.ValueOffsets[i]); + } + } + } + for (int i = 0; i < array.Fields.Count; i++) { array.Fields[i].Accept(new ArrayComparer(expectedArray.Fields[i], _strictCompare));