dotnet · tannergooding · May 26, 2020 · May 13, 2020 · May 13, 2020 · May 13, 2020
diff --git a/THIRD-PARTY-NOTICES.TXT b/THIRD-PARTY-NOTICES.TXT
@@ -821,6 +821,32 @@ under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 CONDITIONS OF ANY KIND, either express or implied. See the License for the
 specific language governing permissions and limitations under the License.
 
+License notice for DirectX Math Library
+---------------------------------------
+
+https://github.com/microsoft/DirectXMath/blob/master/LICENSE
+
+ The MIT License (MIT)
+
+Copyright (c) 2011-2020 Microsoft Corp
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this
+software and associated documentation files (the "Software"), to deal in the Software
+without restriction, including without limitation the rights to use, copy, modify,
+merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be included in all copies
+or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
+CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
+OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
 License notice for ldap4net
 ---------------------------
 
@@ -833,3 +859,4 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of
 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
diff --git a/src/libraries/System.Numerics.Vectors/tests/Matrix4x4Tests.cs b/src/libraries/System.Numerics.Vectors/tests/Matrix4x4Tests.cs
@@ -219,6 +219,23 @@ public void Matrix4x4InvertAffineTest()
  Assert.True(MathHelper.Equal(i, Matrix4x4.Identity));
  }
 
+ // A test for Invert (Matrix4x4)
+ [Fact]
+ public void Matrix4x4InvertRank3()
+ {
+ // A 4x4 Matrix having a rank of 3
+ Matrix4x4 mtx = new Matrix4x4(1.0f, 2.0f, 3.0f, 0.0f,
+ 5.0f, 1.0f, 6.0f, 0.0f,
+ 8.0f, 9.0f, 1.0f, 0.0f,
+ 4.0f, 7.0f, 3.0f, 0.0f);
+
+ Matrix4x4 actual;
+ Assert.False(Matrix4x4.Invert(mtx, out actual));
+
+ Matrix4x4 i = mtx * actual;
+ Assert.False(MathHelper.Equal(i, Matrix4x4.Identity));
+ }
+
  void DecomposeTest(float yaw, float pitch, float roll, Vector3 expectedTranslation, Vector3 expectedScales)
  {
  Quaternion expectedRotation = Quaternion.CreateFromYawPitchRoll(MathHelper.ToRadians(yaw),

diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/Matrix4x4.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/Matrix4x4.cs
@@ -1,8 +1,10 @@
 // Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
-
+using Internal.Runtime.CompilerServices;
+using System.Diagnostics;
 using System.Globalization;
+using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
@@ -1305,13 +1307,204 @@ public readonly float GetDeterminant()
  d * (e * jo_kn - f * io_km + g * in_jm);
  }
 
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static Vector128<float> Permute(Vector128<float> value, byte control)
+ {
+ if (Avx.IsSupported)
+ {
+ return Avx.Permute(value, control);
+ }
+
+ Debug.Assert(Sse.IsSupported);
+ return Sse.Shuffle(value, value, control);
+ }
+
  /// <summary>
  /// Attempts to calculate the inverse of the given matrix. If successful, result will contain the inverted matrix.
  /// </summary>
  /// <param name="matrix">The source matrix to invert.</param>
  /// <param name="result">If successful, contains the inverted matrix.</param>
  /// <returns>True if the source matrix could be inverted; False otherwise.</returns>
- public static bool Invert(Matrix4x4 matrix, out Matrix4x4 result)
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static unsafe bool Invert(Matrix4x4 matrix, out Matrix4x4 result)
+ {
+ if (Sse.IsSupported)
+ {
+ return SseImpl(matrix, out result);
+ }
+
+ return SoftwareFallback(matrix, out result);
+ }
+
+
+
+ private static unsafe bool SseImpl(Matrix4x4 matrix, out Matrix4x4 result)
+ {
+ // This implementation is based on the DirectX Math Library XMMInverse method
+ // https://github.com/microsoft/DirectXMath/blob/master/Inc/DirectXMathMatrix.inl
+
+ // Load the matrix values into rows
+ Vector128<float> row1 = Sse.LoadVector128(&matrix.M11);
+ Vector128<float> row2 = Sse.LoadVector128(&matrix.M21);
+ Vector128<float> row3 = Sse.LoadVector128(&matrix.M31);
+ Vector128<float> row4 = Sse.LoadVector128(&matrix.M41);
+
+ // Transpose the matrix
+ Vector128<float> vTemp1 = Sse.Shuffle(row1, row2, 0x44); //_MM_SHUFFLE(1, 0, 1, 0)
+ Vector128<float> vTemp3 = Sse.Shuffle(row1, row2, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
+ Vector128<float> vTemp2 = Sse.Shuffle(row3, row4, 0x44); //_MM_SHUFFLE(1, 0, 1, 0)
+ Vector128<float> vTemp4 = Sse.Shuffle(row3, row4, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
+
+ row1 = Sse.Shuffle(vTemp1, vTemp2, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
+ row2 = Sse.Shuffle(vTemp1, vTemp2, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)
+ row3 = Sse.Shuffle(vTemp3, vTemp4, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
+ row4 = Sse.Shuffle(vTemp3, vTemp4, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)
+
+ Vector128<float> V00 = Permute(row3, 0x50); //_MM_SHUFFLE(1, 1, 0, 0)
+ Vector128<float> V10 = Permute(row4, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
+ Vector128<float> V01 = Permute(row1, 0x50); //_MM_SHUFFLE(1, 1, 0, 0)
+ Vector128<float> V11 = Permute(row2, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
+ Vector128<float> V02 = Sse.Shuffle(row3, row1, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
+ Vector128<float> V12 = Sse.Shuffle(row4, row2, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)
+
+ Vector128<float> D0 = Sse.Multiply(V00, V10);
+ Vector128<float> D1 = Sse.Multiply(V01, V11);
+ Vector128<float> D2 = Sse.Multiply(V02, V12);
+
+ V00 = Permute(row3, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
+ V10 = Permute(row4, 0x50); //_MM_SHUFFLE(1, 1, 0, 0)
+ V01 = Permute(row1, 0xEE); //_MM_SHUFFLE(3, 2, 3, 2)
+ V11 = Permute(row2, 0x50); //_MM_SHUFFLE(1, 1, 0, 0)
+ V02 = Sse.Shuffle(row3, row1, 0xDD); //_MM_SHUFFLE(3, 1, 3, 1)
+ V12 = Sse.Shuffle(row4, row2, 0x88); //_MM_SHUFFLE(2, 0, 2, 0)
+
+ // Note: We use this expansion pattern instead of Fused Multiply Add
+ // in order to support older hardware
+ D0 = Sse.Subtract(D0, Sse.Multiply(V00, V10));
+ D1 = Sse.Subtract(D1, Sse.Multiply(V01, V11));
+ D2 = Sse.Subtract(D2, Sse.Multiply(V02, V12));
+
+ // V11 = D0Y,D0W,D2Y,D2Y
+ V11 = Sse.Shuffle(D0, D2, 0x5D); //_MM_SHUFFLE(1, 1, 3, 1)
+ V00 = Permute(row2, 0x49); //_MM_SHUFFLE(1, 0, 2, 1)
+ V10 = Sse.Shuffle(V11, D0, 0x32); //_MM_SHUFFLE(0, 3, 0, 2)
+ V01 = Permute(row1, 0x12); //_MM_SHUFFLE(0, 1, 0, 2)
+ V11 = Sse.Shuffle(V11, D0, 0x99); //_MM_SHUFFLE(2, 1, 2, 1)
+
+ // V13 = D1Y,D1W,D2W,D2W
+ Vector128<float> V13 = Sse.Shuffle(D1, D2, 0xFD); //_MM_SHUFFLE(3, 3, 3, 1)
+ V02 = Permute(row4, 0x49); //_MM_SHUFFLE(1, 0, 2, 1)
+ V12 = Sse.Shuffle(V13, D1, 0x32); //_MM_SHUFFLE(0, 3, 0, 2)
+ Vector128<float> V03 = Permute(row3, 0x12); //_MM_SHUFFLE(0, 1, 0, 2)
+ V13 = Sse.Shuffle(V13, D1, 0x99); //_MM_SHUFFLE(2, 1, 2, 1)
+
+ Vector128<float> C0 = Sse.Multiply(V00, V10);
+ Vector128<float> C2 = Sse.Multiply(V01, V11);
+ Vector128<float> C4 = Sse.Multiply(V02, V12);
+ Vector128<float> C6 = Sse.Multiply(V03, V13);
+
+ // V11 = D0X,D0Y,D2X,D2X
+ V11 = Sse.Shuffle(D0, D2, 0x4); //_MM_SHUFFLE(0, 0, 1, 0)
+ V00 = Permute(row2, 0x9e); //_MM_SHUFFLE(2, 1, 3, 2)
+ V10 = Sse.Shuffle(D0, V11, 0x93); //_MM_SHUFFLE(2, 1, 0, 3)
+ V01 = Permute(row1, 0x7b); //_MM_SHUFFLE(1, 3, 2, 3)
+ V11 = Sse.Shuffle(D0, V11, 0x26); //_MM_SHUFFLE(0, 2, 1, 2)
+
+ // V13 = D1X,D1Y,D2Z,D2Z
+ V13 = Sse.Shuffle(D1, D2, 0xa4); //_MM_SHUFFLE(2, 2, 1, 0)
+ V02 = Permute(row4, 0x9e); //_MM_SHUFFLE(2, 1, 3, 2)
+ V12 = Sse.Shuffle(D1, V13, 0x93); //_MM_SHUFFLE(2, 1, 0, 3)
+ V03 = Permute(row3, 0x7b); //_MM_SHUFFLE(1, 3, 2, 3)
+ V13 = Sse.Shuffle(D1, V13, 0x26); //_MM_SHUFFLE(0, 2, 1, 2)
+
+ C0 = Sse.Subtract(C0, Sse.Multiply(V00, V10));
+ C2 = Sse.Subtract(C2, Sse.Multiply(V01, V11));
+ C4 = Sse.Subtract(C4, Sse.Multiply(V02, V12));
+ C6 = Sse.Subtract(C6, Sse.Multiply(V03, V13));
+
+ V00 = Permute(row2, 0x33); //_MM_SHUFFLE(0, 3, 0, 3)
+
+ // V10 = D0Z,D0Z,D2X,D2Y
+ V10 = Sse.Shuffle(D0, D2, 0x4A); //_MM_SHUFFLE(1, 0, 2, 2)
+ V10 = Permute(V10, 0x2C); //_MM_SHUFFLE(0, 2, 3, 0)
+ V01 = Permute(row1, 0x8D); //_MM_SHUFFLE(2, 0, 3, 1)
+
+ // V11 = D0X,D0W,D2X,D2Y
+ V11 = Sse.Shuffle(D0, D2, 0x4C); //_MM_SHUFFLE(1, 0, 3, 0)
+ V11 = Permute(V11, 0x93); //_MM_SHUFFLE(2, 1, 0, 3)
+ V02 = Permute(row4, 0x33); //_MM_SHUFFLE(0, 3, 0, 3)
+
+ // V12 = D1Z,D1Z,D2Z,D2W
+ V12 = Sse.Shuffle(D1, D2, 0xEA); //_MM_SHUFFLE(3, 2, 2, 2)
+ V12 = Permute(V12, 0x2C); //_MM_SHUFFLE(0, 2, 3, 0)
+ V03 = Permute(row3, 0x8D); //_MM_SHUFFLE(2, 0, 3, 1)
+
+ // V13 = D1X,D1W,D2Z,D2W
+ V13 = Sse.Shuffle(D1, D2, 0xEC); //_MM_SHUFFLE(3, 2, 3, 0)
+ V13 = Permute(V13, 0x93); //_MM_SHUFFLE(2, 1, 0, 3)
+
+ V00 = Sse.Multiply(V00, V10);
+ V01 = Sse.Multiply(V01, V11);
+ V02 = Sse.Multiply(V02, V12);
+ V03 = Sse.Multiply(V03, V13);
+
+ Vector128<float> C1 = Sse.Subtract(C0, V00);
+ C0 = Sse.Add(C0, V00);
+ Vector128<float> C3 = Sse.Add(C2, V01);
+ C2 = Sse.Subtract(C2, V01);
+ Vector128<float> C5 = Sse.Subtract(C4, V02);
+ C4 = Sse.Add(C4, V02);
+ Vector128<float> C7 = Sse.Add(C6, V03);
+ C6 = Sse.Subtract(C6, V03);
+
+ C0 = Sse.Shuffle(C0, C1, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+ C2 = Sse.Shuffle(C2, C3, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+ C4 = Sse.Shuffle(C4, C5, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+ C6 = Sse.Shuffle(C6, C7, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+
+ C0 = Permute(C0, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+ C2 = Permute(C2, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+ C4 = Permute(C4, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+ C6 = Permute(C6, 0xD8); //_MM_SHUFFLE(3, 1, 2, 0)
+
+ // Get the determinant
+ vTemp2 = row1;
+ float det = Vector4.Dot(C0.AsVector4(), vTemp2.AsVector4());
+
+ // Check determinate is not zero
+ if (MathF.Abs(det) < float.Epsilon)
+ {
+ result = new Matrix4x4(float.NaN, float.NaN, float.NaN, float.NaN,
+ float.NaN, float.NaN, float.NaN, float.NaN,
+ float.NaN, float.NaN, float.NaN, float.NaN,
+ float.NaN, float.NaN, float.NaN, float.NaN);
+ return false;
+ }
+
+ // Create Vector128<float> copy of the determinant and invert them.
+ Vector128<float> ones = Vector128.Create(1.0f, 1.0f, 1.0f, 1.0f);
- Vector128<float> ones = Vector128.Create(1.0f, 1.0f, 1.0f, 1.0f);
+ Vector128<float> ones = Vector128.Create(1.0f);
- Vector128<float> ones = Vector128.Create(1.0f, 1.0f, 1.0f, 1.0f);
+ Vector128<float> ones = Vector128.Create(1.0f);
+ Vector128<float> vTemp = Vector128.Create(det, det, det, det);
+ vTemp = Sse.Divide(ones, vTemp);
+
+ row1 = Sse.Multiply(C0, vTemp);
+ row2 = Sse.Multiply(C2, vTemp);
+ row3 = Sse.Multiply(C4, vTemp);
+ row4 = Sse.Multiply(C6, vTemp);
+
+ Unsafe.SkipInit(out result);
+ ref Vector128<float> vResult = ref Unsafe.As<Matrix4x4, Vector128<float>>(ref result);
+
+ vResult = row1;
+ Unsafe.Add(ref vResult, 1) = row2;
+ Unsafe.Add(ref vResult, 2) = row3;
+ Unsafe.Add(ref vResult, 3) = row4;
+
+ return true;
+ }
+
+ private static bool SoftwareFallback(Matrix4x4 matrix, out Matrix4x4 result)
  {
  // -1
  // If you have matrix M, inverse Matrix M can compute