diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index 55c50360e31d..b9f318fc46f3 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -267,11 +267,20 @@ bool emitter::Is4ByteSSE4Instruction(instruction ins)
bool emitter::TakesVexPrefix(instruction ins)
{
// special case vzeroupper as it requires 2-byte VEX prefix
- // special case sfence and the prefetch instructions as they never take a VEX prefix
- if ((ins == INS_vzeroupper) || (ins == INS_sfence) || (ins == INS_prefetcht0) || (ins == INS_prefetcht1) ||
- (ins == INS_prefetcht2) || (ins == INS_prefetchnta))
+ // special case (l|m|s)fence and the prefetch instructions as they never take a VEX prefix
+ switch (ins)
{
- return false;
+ case INS_lfence:
+ case INS_mfence:
+ case INS_prefetchnta:
+ case INS_prefetcht0:
+ case INS_prefetcht1:
+ case INS_prefetcht2:
+ case INS_sfence:
+ case INS_vzeroupper:
+ return false;
+ default:
+ break;
}
return IsAVXInstruction(ins);
@@ -2474,7 +2483,7 @@ void emitter::emitIns(instruction ins)
ins == INS_sahf || ins == INS_stosb || ins == INS_stosd || ins == INS_stosp
#ifndef LEGACY_BACKEND
// These instructions take zero operands
- || ins == INS_vzeroupper || ins == INS_sfence
+ || ins == INS_vzeroupper || ins == INS_lfence || ins == INS_mfence || ins == INS_sfence
#endif
);
diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp
index e21def9446d6..597a72b020c6 100644
--- a/src/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/jit/hwintrinsiccodegenxarch.cpp
@@ -744,7 +744,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
regNumber targetReg = node->gtRegNum;
var_types targetType = node->TypeGet();
var_types baseType = node->gtSIMDBaseType;
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
+ instruction ins = INS_invalid;
regNumber op1Reg = REG_NA;
regNumber op2Reg = REG_NA;
emitter* emit = getEmitter();
@@ -765,6 +765,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
assert(op2 != nullptr);
assert(baseType == TYP_DOUBLE);
+ ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
op2Reg = op2->gtRegNum;
ival = Compiler::ivalOfHWIntrinsic(intrinsicID);
emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival);
@@ -772,11 +773,29 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
break;
}
+ case NI_SSE2_LoadFence:
+ {
+ assert(baseType == TYP_VOID);
+ assert(op1 == nullptr);
+ assert(op2 == nullptr);
+ emit->emitIns(INS_lfence);
+ break;
+ }
+ case NI_SSE2_MemoryFence:
+ {
+ assert(baseType == TYP_VOID);
+ assert(op1 == nullptr);
+ assert(op2 == nullptr);
+ emit->emitIns(INS_mfence);
+ break;
+ }
+
case NI_SSE2_MoveMask:
{
assert(op2 == nullptr);
assert(baseType == TYP_BYTE || baseType == TYP_UBYTE || baseType == TYP_DOUBLE);
+ ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
break;
}
@@ -788,6 +807,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
assert(op1 == nullptr);
assert(op2 == nullptr);
+ ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
break;
}
diff --git a/src/jit/hwintrinsiclistxarch.h b/src/jit/hwintrinsiclistxarch.h
index 05f00d283dc1..7ffbcef9c8b3 100644
--- a/src/jit/hwintrinsiclistxarch.h
+++ b/src/jit/hwintrinsiclistxarch.h
@@ -150,9 +150,11 @@ HARDWARE_INTRINSIC(SSE2_ConvertToVector128Int32WithTruncation, "ConvertToV
HARDWARE_INTRINSIC(SSE2_ConvertToVector128Single, "ConvertToVector128Single", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromArg)
HARDWARE_INTRINSIC(SSE2_Divide, "Divide", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_LoadAlignedVector128, "LoadAlignedVector128", SSE2, -1, 16, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_invalid, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2_LoadFence, "LoadFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_LoadScalarVector128, "LoadScalarVector128", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_movd, INS_movq, INS_movq, INS_invalid, INS_movsdsse2}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_LoadVector128, "LoadVector128", SSE2, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_invalid, INS_movupd}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_Max, "Max", SSE2, -1, 16, 2, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2_MemoryFence, "MemoryFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_Min, "Min", SSE2, -1, 16, 2, {INS_invalid, INS_pminub, INS_pminsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2_MoveMask, "MoveMask", SSE2, -1, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskpd}, HW_Category_Special, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_Multiply, "Multiply", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuludq, INS_invalid, INS_invalid, INS_invalid, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_BaseTypeFromArg)
diff --git a/src/jit/hwintrinsicxarch.cpp b/src/jit/hwintrinsicxarch.cpp
index 64101e90a638..0fc225160817 100644
--- a/src/jit/hwintrinsicxarch.cpp
+++ b/src/jit/hwintrinsicxarch.cpp
@@ -736,6 +736,8 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic,
var_types baseType = TYP_UNKNOWN;
var_types retType = TYP_UNKNOWN;
+ assert((simdSize == 16) || (simdSize == 0));
+
switch (intrinsic)
{
case NI_SSE2_CompareLessThan:
@@ -754,6 +756,17 @@ GenTree* Compiler::impSSE2Intrinsic(NamedIntrinsic intrinsic,
}
break;
+ case NI_SSE2_LoadFence:
+ case NI_SSE2_MemoryFence:
+ {
+ assert(sig->numArgs == 0);
+ assert(JITtype2varType(sig->retType) == TYP_VOID);
+ assert(simdSize == 0);
+
+ retNode = gtNewSimdHWIntrinsicNode(TYP_VOID, intrinsic, TYP_VOID, simdSize);
+ break;
+ }
+
case NI_SSE2_MoveMask:
assert(sig->numArgs == 1);
retType = JITtype2varType(sig->retType);
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index eedfd6352b23..49640d576fc8 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -220,6 +220,8 @@ INST3( shufpd, "shufpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCK
INST3( punpckhdq, "punpckhdq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6A))
+INST3( lfence, "lfence" , 0, IUM_RD, 0, 0, 0x000FE8AE, BAD_CODE, BAD_CODE)
+INST3( mfence, "mfence" , 0, IUM_RD, 0, 0, 0x000FF0AE, BAD_CODE, BAD_CODE)
INST3( prefetchnta, "prefetchnta" , 0, IUM_RD, 0, 0, 0x000F0018, BAD_CODE, BAD_CODE)
INST3( prefetcht0, "prefetcht0" , 0, IUM_RD, 0, 0, 0x000F0818, BAD_CODE, BAD_CODE)
INST3( prefetcht1, "prefetcht1" , 0, IUM_RD, 0, 0, 0x000F1018, BAD_CODE, BAD_CODE)
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence.cs
new file mode 100644
index 000000000000..2336e7ed741c
--- /dev/null
+++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence.cs
@@ -0,0 +1,38 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+
+namespace IntelHardwareIntrinsicTest
+{
+ class Program
+ {
+ const int Pass = 100;
+ const int Fail = 0;
+
+ static unsafe int Main(string[] args)
+ {
+ int testResult = Pass;
+
+ if (Sse2.IsSupported)
+ {
+ try
+ {
+ Sse2.LoadFence();
+ }
+ catch
+ {
+ testResult = Fail;
+ }
+ }
+
+ return testResult;
+ }
+ }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence_r.csproj
new file mode 100644
index 000000000000..f96bbfefd634
--- /dev/null
+++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence_r.csproj
@@ -0,0 +1,34 @@
+
+
+
+
+ Debug
+ AnyCPU
+ 2.0
+ {95DFC527-4DC1-495E-97D7-E94EE1F7140D}
+ Exe
+ {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}
+ ..\..\
+ true
+
+
+
+
+
+
+ False
+
+
+
+ None
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence_ro.csproj
new file mode 100644
index 000000000000..8501d3eea713
--- /dev/null
+++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/LoadFence_ro.csproj
@@ -0,0 +1,34 @@
+
+
+
+
+ Debug
+ AnyCPU
+ 2.0
+ {95DFC527-4DC1-495E-97D7-E94EE1F7140D}
+ Exe
+ {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}
+ ..\..\
+ true
+
+
+
+
+
+
+ False
+
+
+
+ None
+ True
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence.cs
new file mode 100644
index 000000000000..0a5faf50cdb2
--- /dev/null
+++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence.cs
@@ -0,0 +1,38 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.Intrinsics;
+
+namespace IntelHardwareIntrinsicTest
+{
+ class Program
+ {
+ const int Pass = 100;
+ const int Fail = 0;
+
+ static unsafe int Main(string[] args)
+ {
+ int testResult = Pass;
+
+ if (Sse2.IsSupported)
+ {
+ try
+ {
+ Sse2.MemoryFence();
+ }
+ catch
+ {
+ testResult = Fail;
+ }
+ }
+
+ return testResult;
+ }
+ }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence_r.csproj
new file mode 100644
index 000000000000..0a74823f12fd
--- /dev/null
+++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence_r.csproj
@@ -0,0 +1,34 @@
+
+
+
+
+ Debug
+ AnyCPU
+ 2.0
+ {95DFC527-4DC1-495E-97D7-E94EE1F7140D}
+ Exe
+ {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}
+ ..\..\
+ true
+
+
+
+
+
+
+ False
+
+
+
+ None
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence_ro.csproj
new file mode 100644
index 000000000000..40da205747bf
--- /dev/null
+++ b/tests/src/JIT/HardwareIntrinsics/X86/Sse2/MemoryFence_ro.csproj
@@ -0,0 +1,34 @@
+
+
+
+
+ Debug
+ AnyCPU
+ 2.0
+ {95DFC527-4DC1-495E-97D7-E94EE1F7140D}
+ Exe
+ {786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}
+ ..\..\
+ true
+
+
+
+
+
+
+ False
+
+
+
+ None
+ True
+
+
+
+
+
+
+
+
+
+