diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp index 55c50360e31d..b9f318fc46f3 100644 --- a/src/jit/emitxarch.cpp +++ b/src/jit/emitxarch.cpp @@ -267,11 +267,20 @@ bool emitter::Is4ByteSSE4Instruction(instruction ins) bool emitter::TakesVexPrefix(instruction ins) { // special case vzeroupper as it requires 2-byte VEX prefix - // special case sfence and the prefetch instructions as they never take a VEX prefix - if ((ins == INS_vzeroupper) || (ins == INS_sfence) || (ins == INS_prefetcht0) || (ins == INS_prefetcht1) || - (ins == INS_prefetcht2) || (ins == INS_prefetchnta)) + // special case (l|m|s)fence and the prefetch instructions as they never take a VEX prefix + switch (ins) { - return false; + case INS_lfence: + case INS_mfence: + case INS_prefetchnta: + case INS_prefetcht0: + case INS_prefetcht1: + case INS_prefetcht2: + case INS_sfence: + case INS_vzeroupper: + return false; + default: + break; } return IsAVXInstruction(ins); @@ -2474,7 +2483,7 @@ void emitter::emitIns(instruction ins) ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp #ifndef LEGACY_BACKEND // These instructions take zero operands - || ins == INS_vzeroupper || ins == INS_sfence + || ins == INS_vzeroupper || ins == INS_lfence || ins == INS_mfence || ins == INS_sfence #endif ); diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index e21def9446d6..597a72b020c6 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -744,7 +744,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) regNumber targetReg = node->gtRegNum; var_types targetType = node->TypeGet(); var_types baseType = node->gtSIMDBaseType; - instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); + instruction ins = INS_invalid; regNumber op1Reg = REG_NA; regNumber op2Reg = REG_NA; emitter* emit = getEmitter(); @@ -765,6 +765,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) assert(op2 != nullptr); assert(baseType == TYP_DOUBLE); + ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); op2Reg = op2->gtRegNum; ival = Compiler::ivalOfHWIntrinsic(intrinsicID); emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival); @@ -772,11 +773,29 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) break; } + case NI_SSE2_LoadFence: + { + assert(baseType == TYP_VOID); + assert(op1 == nullptr); + assert(op2 == nullptr); + emit->emitIns(INS_lfence); + break; + } + case NI_SSE2_MemoryFence: + { + assert(baseType == TYP_VOID); + assert(op1 == nullptr); + assert(op2 == nullptr); + emit->emitIns(INS_mfence); + break; + } + case NI_SSE2_MoveMask: { assert(op2 == nullptr); assert(baseType == TYP_BYTE || baseType == TYP_UBYTE || baseType == TYP_DOUBLE); + ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg); break; } @@ -788,6 +807,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node) assert(op1 == nullptr); assert(op2 == nullptr); + ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType); emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg); break; } diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h index 05f00d283dc1..7ffbcef9c8b3 100644 --- a/src/jit/hwintrinsiclistxarch.h +++ b/src/jit/hwintrinsiclistxarch.h @@ -150,9 +150,11 @@ HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32WithTruncation, "ConvertToV HARDWARE_INTRINSIC(SSE2_ConvertToVector128Single, "ConvertToVector128Single", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromArg) HARDWARE_INTRINSIC(SSE2_Divide, "Divide", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_LoadAlignedVector128, "LoadAlignedVector128", SSE2, -1, 16, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_invalid, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE2_LoadFence, "LoadFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_LoadScalarVector128, "LoadScalarVector128", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_movd, INS_movq, INS_movq, INS_invalid, INS_movsdsse2}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_LoadVector128, "LoadVector128", SSE2, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_invalid, INS_movupd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_Max, "Max", SSE2, -1, 16, 2, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(SSE2_MemoryFence, "MemoryFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_Min, "Min", SSE2, -1, 16, 2, {INS_invalid, INS_pminub, INS_pminsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) HARDWARE_INTRINSIC(SSE2_MoveMask, "MoveMask", SSE2, -1, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskpd}, HW_Category_Special, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2_Multiply, "Multiply", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuludq, INS_invalid, INS_invalid, INS_invalid, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_BaseTypeFromArg) diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp index 64101e90a638..0fc225160817 100644 --- a/src/jit/hwintrinsicxarch.cpp +++ b/src/jit/hwintrinsicxarch.cpp @@ -736,6 +736,8 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, var_types baseType = TYP_UNKNOWN; var_types retType = TYP_UNKNOWN; + assert((simdSize == 16) || (simdSize == 0)); + switch (intrinsic) { case NI_SSE2_CompareLessThan: @@ -754,6 +756,17 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic, } break; + case NI_SSE2_LoadFence: + case NI_SSE2_MemoryFence: + { + assert(sig->numArgs == 0); + assert(JITtype2varType(sig->retType) == TYP_VOID); + assert(simdSize == 0); + + retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, intrinsic, TYP_VOID, simdSize); + break; + } + case NI_SSE2_MoveMask: assert(sig->numArgs == 1); retType = JITtype2varType(sig->retType); diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h index eedfd6352b23..49640d576fc8 100644 --- a/src/jit/instrsxarch.h +++ b/src/jit/instrsxarch.h @@ -220,6 +220,8 @@ INST3( shufpd, "shufpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCK INST3( punpckhdq, "punpckhdq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6A)) +INST3( lfence, "lfence" , 0, IUM_RD, 0, 0, 0x000FE8AE, BAD_CODE, BAD_CODE) +INST3( mfence, "mfence" , 0, IUM_RD, 0, 0, 0x000FF0AE, BAD_CODE, BAD_CODE) INST3( prefetchnta, "prefetchnta" , 0, IUM_RD, 0, 0, 0x000F0018, BAD_CODE, BAD_CODE) INST3( prefetcht0, "prefetcht0" , 0, IUM_RD, 0, 0, 0x000F0818, BAD_CODE, BAD_CODE) INST3( prefetcht1, "prefetcht1" , 0, IUM_RD, 0, 0, 0x000F1018, BAD_CODE, BAD_CODE) diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence.cs new file mode 100644 index 000000000000..2336e7ed741c --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence.cs @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; + +namespace IntelHardwareIntrinsicTest +{ + class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe int Main(string[] args) + { + int testResult = Pass; + + if (Sse2.IsSupported) + { + try + { + Sse2.LoadFence(); + } + catch + { + testResult = Fail; + } + } + + return testResult; + } + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence_r.csproj new file mode 100644 index 000000000000..f96bbfefd634 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence_r.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence_ro.csproj new file mode 100644 index 000000000000..8501d3eea713 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence_ro.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + True + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence.cs new file mode 100644 index 000000000000..0a5faf50cdb2 --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence.cs @@ -0,0 +1,38 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; +using System.Runtime.Intrinsics; + +namespace IntelHardwareIntrinsicTest +{ + class Program + { + const int Pass = 100; + const int Fail = 0; + + static unsafe int Main(string[] args) + { + int testResult = Pass; + + if (Sse2.IsSupported) + { + try + { + Sse2.MemoryFence(); + } + catch + { + testResult = Fail; + } + } + + return testResult; + } + } +} diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence_r.csproj new file mode 100644 index 000000000000..0a74823f12fd --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence_r.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + + + + + + + + + + + diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence_ro.csproj new file mode 100644 index 000000000000..40da205747bf --- /dev/null +++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence_ro.csproj @@ -0,0 +1,34 @@ + + + + + Debug + AnyCPU + 2.0 + {95DFC527-4DC1-495E-97D7-E94EE1F7140D} + Exe + {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC} + ..\..\ + true + + + + + + + False + + + + None + True + + + + + + + + + +