Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SIMD acceleration for Matrix4x4.Invert #34394 #36323

Merged
merged 11 commits into from
May 26, 2020
27 changes: 27 additions & 0 deletions THIRD-PARTY-NOTICES.TXT
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,32 @@ under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.

License notice for DirectX Math Library
---------------------------------------

https://github.com/microsoft/DirectXMath/blob/master/LICENSE

The MIT License (MIT)

Copyright (c) 2011-2020 Microsoft Corp

Permission is hereby granted, free of charge, to any person obtaining a copy of this
software and associated documentation files (the "Software"), to deal in the Software
without restriction, including without limitation the rights to use, copy, modify,
merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice shall be included in all copies
or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

License notice for ldap4net
---------------------------

Expand All @@ -833,3 +859,4 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

17 changes: 17 additions & 0 deletions src/libraries/System.Numerics.Vectors/tests/Matrix4x4Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,23 @@ public void Matrix4x4InvertAffineTest()
Assert.True(MathHelper.Equal(i, Matrix4x4.Identity));
}

// A test for Invert (Matrix4x4)
[Fact]
public void Matrix4x4InvertRank3()
{
// A 4x4 Matrix having a rank of 3
Matrix4x4 mtx = new Matrix4x4(1.0f, 2.0f, 3.0f, 0.0f,
5.0f, 1.0f, 6.0f, 0.0f,
8.0f, 9.0f, 1.0f, 0.0f,
4.0f, 7.0f, 3.0f, 0.0f);

Matrix4x4 actual;
Assert.False(Matrix4x4.Invert(mtx, out actual));

Matrix4x4 i = mtx * actual;
Assert.False(MathHelper.Equal(i, Matrix4x4.Identity));
}

void DecomposeTest(float yaw, float pitch, float roll, Vector3 expectedTranslation, Vector3 expectedScales)
{
Quaternion expectedRotation = Quaternion.CreateFromYawPitchRoll(MathHelper.ToRadians(yaw),
Expand Down
197 changes: 195 additions & 2 deletions src/libraries/System.Private.CoreLib/src/System/Numerics/Matrix4x4.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using Internal.Runtime.CompilerServices;
using System.Diagnostics;
using System.Globalization;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
Expand Down Expand Up @@ -1305,13 +1307,204 @@ public readonly float GetDeterminant()
d * (e * jo_kn - f * io_km + g * in_jm);
}


[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128<float> Permute(Vector128<float> value, byte control)
{
if (Avx.IsSupported)
{
return Avx.Permute(value, control);
}

Debug.Assert(Sse.IsSupported);
return Sse.Shuffle(value, value, control);
}

/// <summary>
/// Attempts to calculate the inverse of the given matrix. If successful, result will contain the inverted matrix.
/// </summary>
/// <param name="matrix">The source matrix to invert.</param>
/// <param name="result">If successful, contains the inverted matrix.</param>
/// <returns>True if the source matrix could be inverted; False otherwise.</returns>
public static bool Invert(Matrix4x4 matrix, out Matrix4x4 result)
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe bool Invert(Matrix4x4 matrix, out Matrix4x4 result)
{
if (Sse.IsSupported)
tannergooding marked this conversation as resolved.
Show resolved Hide resolved
eanova marked this conversation as resolved.
Show resolved Hide resolved
{
return SseImpl(matrix, out result);
}

return SoftwareFallback(matrix, out result);
}



private static unsafe bool SseImpl(Matrix4x4 matrix, out Matrix4x4 result)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This and the other should still be made static local functions as I called out here: #36323 (comment)

Otherwise they may conflict with other methods we also accelerate.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Everything thing else LGTM and I believe it is otherwise ready to merge

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, wasn't familiar with local functions. I thought you were pointing out the public modifier instead. Fixed!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No worries, I should have clarified 😄

{
// This implementation is based on the DirectX Math Library XMMInverse method
// https://github.com/microsoft/DirectXMath/blob/master/Inc/DirectXMathMatrix.inl

// Load the matrix values into rows
Vector128<float> row1 = Sse.LoadVector128(&matrix.M11);
Vector128<float> row2 = Sse.LoadVector128(&matrix.M21);
Vector128<float> row3 = Sse.LoadVector128(&matrix.M31);
Vector128<float> row4 = Sse.LoadVector128(&matrix.M41);

// Transpose the matrix
Vector128<float> vTemp1 = Sse.Shuffle(row1, row2, 0x44); //_MM_SHUFFLE(1, 0, 1, 0)
Vector128<float> vTemp3 = Sse.Shuffle(row1, row2, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
Vector128<float> vTemp2 = Sse.Shuffle(row3, row4, 0x44); //_MM_SHUFFLE(1, 0, 1, 0)
Vector128<float> vTemp4 = Sse.Shuffle(row3, row4, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)

row1 = Sse.Shuffle(vTemp1, vTemp2, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
row2 = Sse.Shuffle(vTemp1, vTemp2, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)
row3 = Sse.Shuffle(vTemp3, vTemp4, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
row4 = Sse.Shuffle(vTemp3, vTemp4, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)

Vector128<float> V00 = Permute(row3, 0x50); //_MM_SHUFFLE(1, 1, 0, 0)
Vector128<float> V10 = Permute(row4, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
Vector128<float> V01 = Permute(row1, 0x50); //_MM_SHUFFLE(1, 1, 0, 0)
Vector128<float> V11 = Permute(row2, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
Vector128<float> V02 = Sse.Shuffle(row3, row1, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
Vector128<float> V12 = Sse.Shuffle(row4, row2, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)

Vector128<float> D0 = Sse.Multiply(V00, V10);
Vector128<float> D1 = Sse.Multiply(V01, V11);
Vector128<float> D2 = Sse.Multiply(V02, V12);

V00 = Permute(row3, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
V10 = Permute(row4, 0x50); //_MM_SHUFFLE(1, 1, 0, 0)
V01 = Permute(row1, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
V11 = Permute(row2, 0x50); //_MM_SHUFFLE(1, 1, 0, 0)
V02 = Sse.Shuffle(row3, row1, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)
V12 = Sse.Shuffle(row4, row2, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)

// Note: We use this expansion pattern instead of Fused Multiply Add
// in order to support older hardware
D0 = Sse.Subtract(D0, Sse.Multiply(V00, V10));
D1 = Sse.Subtract(D1, Sse.Multiply(V01, V11));
D2 = Sse.Subtract(D2, Sse.Multiply(V02, V12));

// V11 = D0Y,D0W,D2Y,D2Y
V11 = Sse.Shuffle(D0, D2, 0x5D); //_MM_SHUFFLE(1, 1, 3, 1)
V00 = Permute(row2, 0x49); //_MM_SHUFFLE(1, 0, 2, 1)
V10 = Sse.Shuffle(V11, D0, 0x32); //_MM_SHUFFLE(0, 3, 0, 2)
V01 = Permute(row1, 0x12); //_MM_SHUFFLE(0, 1, 0, 2)
V11 = Sse.Shuffle(V11, D0, 0x99); //_MM_SHUFFLE(2, 1, 2, 1)

// V13 = D1Y,D1W,D2W,D2W
Vector128<float> V13 = Sse.Shuffle(D1, D2, 0xFD); //_MM_SHUFFLE(3, 3, 3, 1)
V02 = Permute(row4, 0x49); //_MM_SHUFFLE(1, 0, 2, 1)
V12 = Sse.Shuffle(V13, D1, 0x32); //_MM_SHUFFLE(0, 3, 0, 2)
Vector128<float> V03 = Permute(row3, 0x12); //_MM_SHUFFLE(0, 1, 0, 2)
V13 = Sse.Shuffle(V13, D1, 0x99); //_MM_SHUFFLE(2, 1, 2, 1)

Vector128<float> C0 = Sse.Multiply(V00, V10);
Vector128<float> C2 = Sse.Multiply(V01, V11);
Vector128<float> C4 = Sse.Multiply(V02, V12);
Vector128<float> C6 = Sse.Multiply(V03, V13);

// V11 = D0X,D0Y,D2X,D2X
V11 = Sse.Shuffle(D0, D2, 0x4); //_MM_SHUFFLE(0, 0, 1, 0)
V00 = Permute(row2, 0x9e); //_MM_SHUFFLE(2, 1, 3, 2)
V10 = Sse.Shuffle(D0, V11, 0x93); //_MM_SHUFFLE(2, 1, 0, 3)
V01 = Permute(row1, 0x7b); //_MM_SHUFFLE(1, 3, 2, 3)
V11 = Sse.Shuffle(D0, V11, 0x26); //_MM_SHUFFLE(0, 2, 1, 2)

// V13 = D1X,D1Y,D2Z,D2Z
V13 = Sse.Shuffle(D1, D2, 0xa4); //_MM_SHUFFLE(2, 2, 1, 0)
V02 = Permute(row4, 0x9e); //_MM_SHUFFLE(2, 1, 3, 2)
V12 = Sse.Shuffle(D1, V13, 0x93); //_MM_SHUFFLE(2, 1, 0, 3)
V03 = Permute(row3, 0x7b); //_MM_SHUFFLE(1, 3, 2, 3)
V13 = Sse.Shuffle(D1, V13, 0x26); //_MM_SHUFFLE(0, 2, 1, 2)

C0 = Sse.Subtract(C0, Sse.Multiply(V00, V10));
C2 = Sse.Subtract(C2, Sse.Multiply(V01, V11));
C4 = Sse.Subtract(C4, Sse.Multiply(V02, V12));
C6 = Sse.Subtract(C6, Sse.Multiply(V03, V13));

V00 = Permute(row2, 0x33); //_MM_SHUFFLE(0, 3, 0, 3)

// V10 = D0Z,D0Z,D2X,D2Y
V10 = Sse.Shuffle(D0, D2, 0x4A); //_MM_SHUFFLE(1, 0, 2, 2)
V10 = Permute(V10, 0x2C); //_MM_SHUFFLE(0, 2, 3, 0)
V01 = Permute(row1, 0x8D); //_MM_SHUFFLE(2, 0, 3, 1)

// V11 = D0X,D0W,D2X,D2Y
V11 = Sse.Shuffle(D0, D2, 0x4C); //_MM_SHUFFLE(1, 0, 3, 0)
V11 = Permute(V11, 0x93); //_MM_SHUFFLE(2, 1, 0, 3)
V02 = Permute(row4, 0x33); //_MM_SHUFFLE(0, 3, 0, 3)

// V12 = D1Z,D1Z,D2Z,D2W
V12 = Sse.Shuffle(D1, D2, 0xEA); //_MM_SHUFFLE(3, 2, 2, 2)
V12 = Permute(V12, 0x2C); //_MM_SHUFFLE(0, 2, 3, 0)
V03 = Permute(row3, 0x8D); //_MM_SHUFFLE(2, 0, 3, 1)

// V13 = D1X,D1W,D2Z,D2W
V13 = Sse.Shuffle(D1, D2, 0xEC); //_MM_SHUFFLE(3, 2, 3, 0)
V13 = Permute(V13, 0x93); //_MM_SHUFFLE(2, 1, 0, 3)

V00 = Sse.Multiply(V00, V10);
V01 = Sse.Multiply(V01, V11);
V02 = Sse.Multiply(V02, V12);
V03 = Sse.Multiply(V03, V13);

Vector128<float> C1 = Sse.Subtract(C0, V00);
C0 = Sse.Add(C0, V00);
Vector128<float> C3 = Sse.Add(C2, V01);
C2 = Sse.Subtract(C2, V01);
Vector128<float> C5 = Sse.Subtract(C4, V02);
C4 = Sse.Add(C4, V02);
Vector128<float> C7 = Sse.Add(C6, V03);
C6 = Sse.Subtract(C6, V03);

C0 = Sse.Shuffle(C0, C1, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
C2 = Sse.Shuffle(C2, C3, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
C4 = Sse.Shuffle(C4, C5, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
C6 = Sse.Shuffle(C6, C7, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)

C0 = Permute(C0, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
C2 = Permute(C2, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
C4 = Permute(C4, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
C6 = Permute(C6, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)

// Get the determinant
vTemp2 = row1;
float det = Vector4.Dot(C0.AsVector4(), vTemp2.AsVector4());

// Check determinate is not zero
if (MathF.Abs(det) < float.Epsilon)
{
result = new Matrix4x4(float.NaN, float.NaN, float.NaN, float.NaN,
float.NaN, float.NaN, float.NaN, float.NaN,
float.NaN, float.NaN, float.NaN, float.NaN,
float.NaN, float.NaN, float.NaN, float.NaN);
return false;
}

// Create Vector128<float> copy of the determinant and invert them.
Vector128<float> ones = Vector128.Create(1.0f, 1.0f, 1.0f, 1.0f);
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Vector128<float> ones = Vector128.Create(1.0f, 1.0f, 1.0f, 1.0f);
Vector128<float> ones = Vector128.Create(1.0f);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

Vector128<float> vTemp = Vector128.Create(det, det, det, det);
eanova marked this conversation as resolved.
Show resolved Hide resolved
vTemp = Sse.Divide(ones, vTemp);

row1 = Sse.Multiply(C0, vTemp);
row2 = Sse.Multiply(C2, vTemp);
row3 = Sse.Multiply(C4, vTemp);
row4 = Sse.Multiply(C6, vTemp);

Unsafe.SkipInit(out result);
ref Vector128<float> vResult = ref Unsafe.As<Matrix4x4, Vector128<float>>(ref result);

vResult = row1;
Unsafe.Add(ref vResult, 1) = row2;
Unsafe.Add(ref vResult, 2) = row3;
Unsafe.Add(ref vResult, 3) = row4;

return true;
}

private static bool SoftwareFallback(Matrix4x4 matrix, out Matrix4x4 result)
{
// -1
// If you have matrix M, inverse Matrix M can compute
Expand Down