Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Increase performance of arithmetic operations by enhancing calculations on nullable values #6846

Merged
merged 55 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from 53 commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
eb2230d
Optimize PrimitiveColumnContainer.Clone method
asmirnov82 Sep 8, 2023
67af276
Avoid unnecessary type conversion during binary operations
asmirnov82 Sep 8, 2023
cbc7c4d
Remove using
asmirnov82 Sep 8, 2023
63d983a
Fix DataFrameBuffer constructor
asmirnov82 Sep 9, 2023
6abf02e
remove uncorrectly added using
asmirnov82 Sep 9, 2023
1a47ce4
Make DataFrameBuffer Length field protected
asmirnov82 Sep 10, 2023
a70f986
Add performance tests
asmirnov82 Sep 12, 2023
4e0cf30
Split Test for AppendMany into 4 different tests
asmirnov82 Sep 13, 2023
cb236bf
Block init of null validity buffer instead of setting individual bits
asmirnov82 Sep 13, 2023
095cb83
Add unit tests for PrimitiveDataFrameColumn.Clone
asmirnov82 Sep 13, 2023
0f78092
Fixes #6821
asmirnov82 Sep 13, 2023
f5e9aaa
Fix
asmirnov82 Sep 13, 2023
69fc3d7
Add extra tests
asmirnov82 Sep 14, 2023
2789c7b
Fix
asmirnov82 Sep 14, 2023
d3944d7
Fix typo
asmirnov82 Sep 14, 2023
8edbae4
Fix Divide_Int16 and Divide_Int32_Int16 benchmarks
asmirnov82 Sep 15, 2023
ec06bd8
Fix
asmirnov82 Sep 15, 2023
1ead12f
Merge branch 'dataframe_primitivecolumncontainer_refactoring' into 68…
asmirnov82 Sep 15, 2023
78e4a64
Merge branch 'dataframe_performance_tests' into 6825_enhance_nullable…
asmirnov82 Sep 15, 2023
b351cf1
Avoid using constructor, that copies memory
asmirnov82 Sep 15, 2023
94bca4e
First step of tt refactoring
asmirnov82 Sep 15, 2023
2366da2
Step 2
asmirnov82 Sep 15, 2023
2a9146e
Step 3
asmirnov82 Sep 18, 2023
f4a371b
Move iteration over buffers outside of the PrimitiveDataFrameColumnAr…
asmirnov82 Sep 18, 2023
1788c5f
Merge remote-tracking branch 'origin/dataframe_arithmetic_refactoring…
asmirnov82 Sep 18, 2023
34c850f
Change PrimitiveDataFrameColumnArithmetic
asmirnov82 Sep 18, 2023
2200642
Merge remote-tracking branch 'origin/main' into dataframe_arithmetic_…
asmirnov82 Sep 18, 2023
c44c628
Merge remote-tracking branch 'origin/main' into dataframe_primitiveco…
asmirnov82 Sep 18, 2023
0724599
Merge remote-tracking branch 'origin/main' into reduce_number_of_copies
asmirnov82 Sep 18, 2023
be47f8d
Fix typo
asmirnov82 Sep 19, 2023
d1b0686
Use RawSpan
asmirnov82 Sep 19, 2023
33a29b0
Fix bug with AppendMany values to not empty column
asmirnov82 Sep 19, 2023
7548dc4
Restart unit tests
asmirnov82 Sep 19, 2023
b80e8a0
Add more unit tests
asmirnov82 Sep 20, 2023
1ab3886
Merge branch 'dataframe_arithmetic_refactoring' into 6825_enhance_nul…
asmirnov82 Sep 20, 2023
7ea90d8
Merge branch 'reduce_number_of_copies' into 6825_enhance_nullable_sup…
asmirnov82 Sep 20, 2023
fa7bfa0
Merge remote-tracking branch 'origin/dataframe_primitivecolumncontain…
asmirnov82 Sep 20, 2023
d36d374
Add GetBitCount method
asmirnov82 Sep 20, 2023
83b5cca
Fix failing unit test
asmirnov82 Sep 20, 2023
fb7a144
Merge remote-tracking branch 'origin/dataframe_primitivecolumncontain…
asmirnov82 Sep 20, 2023
f551964
Implementation
asmirnov82 Sep 20, 2023
7c0d034
Change unit tests
asmirnov82 Sep 20, 2023
65ed342
Update unit tests
asmirnov82 Sep 21, 2023
fadb85e
Refactoring BinaryOperation
asmirnov82 Sep 25, 2023
9a3cd79
Intermediate changes
asmirnov82 Sep 25, 2023
2b25770
Intermediate results
asmirnov82 Sep 27, 2023
59ddfe4
Implement Binary Scalar Reverse Operarions
asmirnov82 Sep 28, 2023
f0cb67e
Merge remote-tracking branch 'origin/main' into 6825_increase_perform…
asmirnov82 Sep 28, 2023
aca4bef
Add implementation for BinaryIntOperations
asmirnov82 Sep 28, 2023
46d5cba
Implement Comparison Operations
asmirnov82 Sep 29, 2023
b94a96f
Implement actual calculations for Comparison operations
asmirnov82 Sep 29, 2023
d9f1b11
Uncomment performance tests
asmirnov82 Sep 29, 2023
bad13e1
Remove unintentional code changes
asmirnov82 Sep 29, 2023
f2b80a1
Add reference to Apache Arrow project license in THIRD-PARTY-NOTICES
asmirnov82 Oct 2, 2023
5440b74
Fix license issues
asmirnov82 Oct 2, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions src/Microsoft.Data.Analysis/ColumnArithmetic.OperationEnums.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@

// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

// Generated from ColumnArithmetic.OperationEnums.tt. Do not modify directly
namespace Microsoft.Data.Analysis
{
internal enum BinaryOperation
{
Add,
Subtract,
Multiply,
Divide,
Modulo,
And,
Or,
Xor,
}

internal enum BinaryScalarOperation
{
Add,
Subtract,
Multiply,
Divide,
Modulo,
And,
Or,
Xor,
}

internal enum BinaryIntOperation
{
LeftShift,
RightShift,
}

internal enum ComparisonOperation
{
ElementwiseEquals,
ElementwiseNotEquals,
ElementwiseGreaterThanOrEqual,
ElementwiseLessThanOrEqual,
ElementwiseGreaterThan,
ElementwiseLessThan,
}

internal enum ComparisonScalarOperation
{
ElementwiseEquals,
ElementwiseNotEquals,
ElementwiseGreaterThanOrEqual,
ElementwiseLessThanOrEqual,
ElementwiseGreaterThan,
ElementwiseLessThan,
}
}
59 changes: 59 additions & 0 deletions src/Microsoft.Data.Analysis/ColumnArithmetic.OperationEnums.tt
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
<#@ template debug="false" hostspecific="false" language="C#" #>
<#@ assembly name="System.Core" #>
<#@ import namespace="System.Linq" #>
<#@ import namespace="System.Text" #>
<#@ import namespace="System.Collections.Generic" #>
<#@ output extension=".cs" #>
<#@ include file="ColumnArithmeticTemplate.ttinclude" #>
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

// Generated from ColumnArithmetic.OperationEnums.tt. Do not modify directly
namespace Microsoft.Data.Analysis
{
internal enum BinaryOperation
{
<# foreach (MethodConfiguration method in methodConfiguration) { #>
<# if (method.MethodType == MethodType.Binary) { #>
<#=method.MethodName#>,
<# } #>
<# } #>
}

internal enum BinaryScalarOperation
{
<# foreach (MethodConfiguration method in methodConfiguration) { #>
<# if (method.MethodType == MethodType.BinaryScalar) { #>
<#=method.MethodName#>,
<# } #>
<# } #>
}

internal enum BinaryIntOperation
{
<# foreach (MethodConfiguration method in methodConfiguration) { #>
<# if (method.MethodType == MethodType.BinaryInt) { #>
<#=method.MethodName#>,
<# } #>
<# } #>
}

internal enum ComparisonOperation
{
<# foreach (MethodConfiguration method in methodConfiguration) { #>
<# if (method.MethodType == MethodType.Comparison) { #>
<#=method.MethodName#>,
<# } #>
<# } #>
}

internal enum ComparisonScalarOperation
{
<# foreach (MethodConfiguration method in methodConfiguration) { #>
<# if (method.MethodType == MethodType.ComparisonScalar) { #>
<#=method.MethodName#>,
<# } #>
<# } #>
}
}
1 change: 0 additions & 1 deletion src/Microsoft.Data.Analysis/DataFrameBuffer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ internal static DataFrameBuffer<T> GetMutableBuffer(ReadOnlyDataFrameBuffer<T> b
if (mutableBuffer == null)
{
mutableBuffer = new DataFrameBuffer<T>(buffer.ReadOnlyBuffer, buffer.Length);
mutableBuffer.Length = buffer.Length;
}
return mutableBuffer;
}
Expand Down
24 changes: 24 additions & 0 deletions src/Microsoft.Data.Analysis/IPrimitiveDataFrameColumnArithmetic.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Collections.Generic;
using System.Text;

namespace Microsoft.Data.Analysis
{
internal interface IPrimitiveDataFrameColumnArithmetic<T>
where T : unmanaged
{
void HandleOperation(BinaryOperation operation, Span<T> left, Span<byte> leftValidity, ReadOnlySpan<T> right, ReadOnlySpan<byte> rightValidity);

void HandleOperation(ComparisonOperation operation, ReadOnlySpan<T> left, ReadOnlySpan<T> right, PrimitiveColumnContainer<bool> container, long offset);
void HandleOperation(ComparisonScalarOperation operation, ReadOnlySpan<T> left, T right, PrimitiveColumnContainer<bool> container, long offset);

void HandleOperation(BinaryScalarOperation operation, Span<T> left, T right);
void HandleOperation(BinaryScalarOperation operation, T left, Span<T> right, ReadOnlySpan<byte> rightValidity);

void HandleOperation(BinaryIntOperation operation, Span<T> left, int right);
}
}
36 changes: 9 additions & 27 deletions src/Microsoft.Data.Analysis/Microsoft.Data.Analysis.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@
</ItemGroup>

<ItemGroup>
<None Update="ColumnArithmetic.OperationEnums.tt">
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>ColumnArithmetic.OperationEnums.cs</LastGenOutput>
</None>
<None Update="Converters.tt">
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>Converters.cs</LastGenOutput>
Expand Down Expand Up @@ -111,10 +115,6 @@
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>PrimitiveDataFrameColumn.Computations.cs</LastGenOutput>
</None>
<None Update="PrimitiveDataFrameColumn.ReversedBinaryOperations.tt">
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>PrimitiveDataFrameColumn.ReversedBinaryOperations.cs</LastGenOutput>
</None>
<None Update="PrimitiveDataFrameColumnArithmetic.tt">
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>PrimitiveColumnArithmetic.cs</LastGenOutput>
Expand All @@ -123,18 +123,10 @@
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>PrimitiveDataFrameColumnComputations.cs</LastGenOutput>
</None>
<None Update="PrimitiveColumnContainer.BinaryOperations.tt">
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>PrimitiveColumnContainer.BinaryOperations.cs</LastGenOutput>
</None>
<None Update="PrimitiveDataFrameColumnArithmetic.tt">
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>PrimitiveDataFrameColumnArithmetic.cs</LastGenOutput>
</None>
<None Update="PrimitiveColumnContainer.BinaryOperations.tt">
<Generator>TextTemplatingFileGenerator</Generator>
<LastGenOutput>PrimitiveColumnContainer.BinaryOperations.cs</LastGenOutput>
</None>
</ItemGroup>

<ItemGroup>
Expand All @@ -147,6 +139,11 @@
<AutoGen>True</AutoGen>
<DependentUpon>Converters.tt</DependentUpon>
</Compile>
<Compile Update="ColumnArithmetic.OperationEnums.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
<DependentUpon>ColumnArithmetic.OperationEnums.tt</DependentUpon>
</Compile>
<Compile Update="PrimitiveDataFrameColumn.BinaryOperationImplementations.Exploded.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
Expand Down Expand Up @@ -202,11 +199,6 @@
<AutoGen>True</AutoGen>
<DependentUpon>PrimitiveDataFrameColumn.Computations.tt</DependentUpon>
</Compile>
<Compile Update="PrimitiveDataFrameColumn.ReversedBinaryOperations.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
<DependentUpon>PrimitiveDataFrameColumn.ReversedBinaryOperations.tt</DependentUpon>
</Compile>
<Compile Update="PrimitiveDataFrameColumnArithmetic.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
Expand All @@ -217,21 +209,11 @@
<AutoGen>True</AutoGen>
<DependentUpon>PrimitiveDataFrameColumnComputations.tt</DependentUpon>
</Compile>
<Compile Update="PrimitiveColumnContainer.BinaryOperations.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
<DependentUpon>PrimitiveColumnContainer.BinaryOperations.tt</DependentUpon>
</Compile>
<Compile Update="PrimitiveDataFrameColumnArithmetic.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
<DependentUpon>PrimitiveDataFrameColumnArithmetic.tt</DependentUpon>
</Compile>
<Compile Update="PrimitiveColumnContainer.BinaryOperations.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
<DependentUpon>PrimitiveColumnContainer.BinaryOperations.tt</DependentUpon>
</Compile>
<Compile Update="Strings.Designer.cs">
<DesignTime>True</DesignTime>
<AutoGen>True</AutoGen>
Expand Down
Loading