From 0fa210d707e69dbe8346809b7041fd3b058c6656 Mon Sep 17 00:00:00 2001 From: Haytam Zanid <34218324+zHaytam@users.noreply.github.com> Date: Mon, 13 Jan 2020 09:17:30 +0100 Subject: [PATCH] Add Applymethod to PrimitiveDataFrameColumn (#2807) * Add Apply method to PrimitiveDataFrameColumn and its container * Add TestApply test * Remove unused df variable in DataFrameTests * Add xml doc comments to Apply method --- .../PrimitiveColumnContainer.cs | 34 +++++++++++++++++++ .../PrimitiveDataFrameColumn.cs | 13 +++++++ .../DataFrameTests.cs | 17 ++++++++++ 3 files changed, 64 insertions(+) diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs index ad94de690e..767200a818 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs @@ -251,6 +251,40 @@ public void ApplyElementwise(Func func) } } + public void Apply(Func func, PrimitiveColumnContainer resultContainer) + where TResult : unmanaged + { + for (int b = 0; b < Buffers.Count; b++) + { + ReadOnlyDataFrameBuffer buffer = Buffers[b]; + long prevLength = checked(Buffers[0].Length * b); + DataFrameBuffer mutableBuffer = DataFrameBuffer.GetMutableBuffer(buffer); + Buffers[b] = mutableBuffer; + Span span = mutableBuffer.Span; + DataFrameBuffer mutableNullBitMapBuffer = DataFrameBuffer.GetMutableBuffer(NullBitMapBuffers[b]); + NullBitMapBuffers[b] = mutableNullBitMapBuffer; + Span nullBitMapSpan = mutableNullBitMapBuffer.Span; + + ReadOnlyDataFrameBuffer resultBuffer = resultContainer.Buffers[b]; + long resultPrevLength = checked(resultContainer.Buffers[0].Length * b); + DataFrameBuffer resultMutableBuffer = DataFrameBuffer.GetMutableBuffer(resultBuffer); + resultContainer.Buffers[b] = resultMutableBuffer; + Span resultSpan = resultMutableBuffer.Span; + DataFrameBuffer resultMutableNullBitMapBuffer = DataFrameBuffer.GetMutableBuffer(resultContainer.NullBitMapBuffers[b]); + resultContainer.NullBitMapBuffers[b] = resultMutableNullBitMapBuffer; + Span resultNullBitMapSpan = resultMutableNullBitMapBuffer.Span; + + for (int i = 0; i < span.Length; i++) + { + long curIndex = i + prevLength; + bool isValid = IsValid(nullBitMapSpan, i); + TResult? value = func(isValid ? span[i] : default(T?)); + resultSpan[i] = value.GetValueOrDefault(); + SetValidityBit(resultNullBitMapSpan, i, value != null); + } + } + } + // Faster to use when we already have a span since it avoids indexing public bool IsValid(ReadOnlySpan bitMapBufferSpan, int index) { diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs index f44202ed44..cf6e6d2b8f 100644 --- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs +++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs @@ -487,6 +487,19 @@ public override Dictionary> GroupColumnValues() public void ApplyElementwise(Func func) => _columnContainer.ApplyElementwise(func); + /// + /// Applies a function to all the values + /// + /// The new column's type + /// The function to apply + /// A new PrimitiveDataFrameColumn containing the new values + public PrimitiveDataFrameColumn Apply(Func func) where TResult : unmanaged + { + var resultColumn = new PrimitiveDataFrameColumn("Result", Length); + _columnContainer.Apply(func, resultColumn._columnContainer); + return resultColumn; + } + /// /// Clips values beyond the specified thresholds /// diff --git a/tests/Microsoft.Data.Analysis.Tests/DataFrameTests.cs b/tests/Microsoft.Data.Analysis.Tests/DataFrameTests.cs index 8fc3eca52b..0de7e10e46 100644 --- a/tests/Microsoft.Data.Analysis.Tests/DataFrameTests.cs +++ b/tests/Microsoft.Data.Analysis.Tests/DataFrameTests.cs @@ -1946,6 +1946,7 @@ public void TestMutationOnRows() } } } + [Fact] public void TestAppendRow() { @@ -2027,5 +2028,21 @@ public void TestAppendEmptyValue() Assert.Equal(13, df.Rows.Count); Assert.Equal(1, df.Columns[2].NullCount); } + + [Fact] + public void TestApply() + { + int[] values = { 1, 2, 3, 4, 5 }; + var col = new PrimitiveDataFrameColumn("Ints", values); + PrimitiveDataFrameColumn newCol = col.Apply(i => i + 0.5d); + + Assert.Equal(values.Length, newCol.Length); + + for (int i = 0; i < newCol.Length; i++) + { + Assert.Equal(col[i], values[i]); // Make sure values didn't change + Assert.Equal(newCol[i], values[i] + 0.5d); + } + } } }