Skip to content

Commit

Permalink
Use an index lookup for O(1) field index access
Browse files Browse the repository at this point in the history
  • Loading branch information
vthemelis committed Dec 21, 2024
1 parent 4274db8 commit 92160cb
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 12 deletions.
26 changes: 14 additions & 12 deletions csharp/src/Apache.Arrow/Schema.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public partial class Schema : IRecordType
private readonly List<Field> _fieldsList;

public ILookup<string, Field> FieldsLookup { get; }
private readonly ILookup<string, int> _fieldsIndexLookup;

public IReadOnlyDictionary<string, string> Metadata { get; }

Expand All @@ -43,17 +44,11 @@ public partial class Schema : IRecordType
public Schema(
IEnumerable<Field> fields,
IEnumerable<KeyValuePair<string, string>> metadata)
: this(
fields?.ToList() ?? throw new ArgumentNullException(nameof(fields)),
metadata?.ToDictionary(kv => kv.Key, kv => kv.Value),
false)
{
if (fields is null)
{
throw new ArgumentNullException(nameof(fields));
}

_fieldsList = fields.ToList();
FieldsLookup = _fieldsList.ToLookup(f => f.Name);
_fieldsDictionary = FieldsLookup.ToDictionary(g => g.Key, g => g.First());

Metadata = metadata?.ToDictionary(kv => kv.Key, kv => kv.Value);
}

internal Schema(List<Field> fieldsList, IReadOnlyDictionary<string, string> metadata, bool copyCollections)
Expand All @@ -66,6 +61,10 @@ internal Schema(List<Field> fieldsList, IReadOnlyDictionary<string, string> meta
_fieldsDictionary = FieldsLookup.ToDictionary(g => g.Key, g => g.First());

Metadata = metadata;

_fieldsIndexLookup = _fieldsList
.Select((x, idx) => (Name: x.Name, Index: idx))
.ToLookup(x => x.Name, x => x.Index, StringComparer.CurrentCulture);
}

public Field GetFieldByIndex(int i) => _fieldsList[i];
Expand All @@ -80,15 +79,18 @@ public int GetFieldIndex(string name, StringComparer comparer)

public int GetFieldIndex(string name, IEqualityComparer<string> comparer = default)
{
comparer ??= StringComparer.CurrentCulture;
if (comparer == null || comparer.Equals(StringComparer.CurrentCulture))
{
return _fieldsIndexLookup[name].First();
}

for (int i = 0; i < _fieldsList.Count; i++)
{
if (comparer.Equals(_fieldsList[i].Name, name))
return i;
}

return -1;
throw new InvalidOperationException();
}

public Schema RemoveField(int fieldIndex)
Expand Down
68 changes: 68 additions & 0 deletions csharp/test/Apache.Arrow.Tests/SchemaTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using Apache.Arrow;
using Apache.Arrow.Types;
using System;
using System.Collections.Generic;
using Xunit;

namespace Apache.Arrow.Tests;

public class SchemaTests
{
[Fact]
public void ThrowsWhenFieldsAreNull()
{
Assert.Throws<ArgumentNullException>(() => new Schema(null, null));
}

[Theory]
[MemberData(nameof(StringComparers))]
public void CanRetrieveFieldIndexByName(StringComparer comparer)
{
var field0 = new Field("f0", Int32Type.Default, true);
var field1 = new Field("f1", Int64Type.Default, true);
var schema = new Schema([field0, field1], null);

Assert.Equal(0, schema.GetFieldIndex("f0", comparer));
Assert.Equal(1, schema.GetFieldIndex("f1", comparer));
Assert.Throws<InvalidOperationException>(() => schema.GetFieldIndex("nonexistent", comparer));
}

[Theory]
[MemberData(nameof(StringComparers))]
public void CanRetrieveFieldIndexByNonUniqueName(StringComparer comparer)
{
var field0 = new Field("f0", Int32Type.Default, true);
var field1 = new Field("f1", Int64Type.Default, true);

// Repeat fields in the list
var schema = new Schema([field0, field1, field0, field1], null);

Assert.Equal(0, schema.GetFieldIndex("f0", comparer));
Assert.Equal(1, schema.GetFieldIndex("f1", comparer));
Assert.Throws<InvalidOperationException>(() => schema.GetFieldIndex("nonexistent", comparer));
}

public static IEnumerable<object[]> StringComparers() =>
new List<object[]>
{
new object[] {null},
new object[] {StringComparer.Ordinal},
new object[] {StringComparer.OrdinalIgnoreCase},
new object[] {StringComparer.CurrentCulture},
};
}

0 comments on commit 92160cb

Please sign in to comment.