Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1142,6 +1142,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::LRINT, MVT::v4f32, Custom);
setOperationAction(ISD::LRINT, MVT::v2i32, Custom);

setOperationAction(ISD::AND, MVT::i128, Custom);
setOperationAction(ISD::OR, MVT::i128, Custom);
setOperationAction(ISD::XOR, MVT::i128, Custom);

for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
Expand Down Expand Up @@ -1481,6 +1485,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::LRINT, MVT::v8f32, Custom);
setOperationAction(ISD::LRINT, MVT::v4f64, Custom);

setOperationAction(ISD::AND, MVT::i256, Custom);
setOperationAction(ISD::OR, MVT::i256, Custom);
setOperationAction(ISD::XOR, MVT::i256, Custom);

// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
// even though v8i16 is a legal type.
setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
Expand Down Expand Up @@ -1836,6 +1844,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasDQI())
setOperationAction(ISD::LLRINT, MVT::v8f64, Legal);

setOperationAction(ISD::AND, MVT::i512, Custom);
setOperationAction(ISD::OR, MVT::i512, Custom);
setOperationAction(ISD::XOR, MVT::i512, Custom);

for (MVT VT : { MVT::v16i1, MVT::v16i8 }) {
setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
Expand Down Expand Up @@ -33926,6 +33938,23 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
case X86ISD::CVTPS2PH:
Results.push_back(LowerCVTPS2PH(SDValue(N, 0), DAG));
return;
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
assert((VT == MVT::i128 || VT == MVT::i256 || VT == MVT::i512) &&
"Unexpected VT!");
// See if this is free to perform on the FPU to avoid splitting.
MVT VecVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
if (!mayFoldIntoVector(N0, Subtarget) || !mayFoldIntoVector(N1, Subtarget))
return;
SDValue Op = DAG.getNode(Opc, dl, VecVT, DAG.getBitcast(VecVT, N0),
DAG.getBitcast(VecVT, N1));
Results.push_back(DAG.getBitcast(VT, Op));
return;
}
case ISD::CTPOP: {
assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
// If we have at most 32 active bits, then perform as i32 CTPOP.
Expand Down
258 changes: 194 additions & 64 deletions llvm/test/CodeGen/X86/elementwise-store-of-scalar-splat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -621,17 +621,41 @@ define void @vec256_double(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
}

define void @vec256_i128(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
; ALL-LABEL: vec256_i128:
; ALL: # %bb.0:
; ALL-NEXT: movq (%rdi), %rax
; ALL-NEXT: movq 8(%rdi), %rcx
; ALL-NEXT: notq %rcx
; ALL-NEXT: notq %rax
; ALL-NEXT: movq %rax, (%rsi)
; ALL-NEXT: movq %rcx, 8(%rsi)
; ALL-NEXT: movq %rcx, 24(%rsi)
; ALL-NEXT: movq %rax, 16(%rsi)
; ALL-NEXT: retq
; SCALAR-LABEL: vec256_i128:
; SCALAR: # %bb.0:
; SCALAR-NEXT: movq (%rdi), %rax
; SCALAR-NEXT: movq 8(%rdi), %rcx
; SCALAR-NEXT: notq %rcx
; SCALAR-NEXT: notq %rax
; SCALAR-NEXT: movq %rax, (%rsi)
; SCALAR-NEXT: movq %rcx, 8(%rsi)
; SCALAR-NEXT: movq %rcx, 24(%rsi)
; SCALAR-NEXT: movq %rax, 16(%rsi)
; SCALAR-NEXT: retq
;
; SSE-LABEL: vec256_i128:
; SSE: # %bb.0:
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
; SSE-NEXT: pxor (%rdi), %xmm0
; SSE-NEXT: movdqa %xmm0, (%rsi)
; SSE-NEXT: movdqa %xmm0, 16(%rsi)
; SSE-NEXT: retq
;
; AVX-LABEL: vec256_i128:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vpxor (%rdi), %xmm0, %xmm0
; AVX-NEXT: vmovdqa %xmm0, 16(%rsi)
; AVX-NEXT: vmovdqa %xmm0, (%rsi)
; AVX-NEXT: retq
;
; AVX512-LABEL: vec256_i128:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vpxor (%rdi), %xmm0, %xmm0
; AVX512-NEXT: vmovdqa %xmm0, 16(%rsi)
; AVX512-NEXT: vmovdqa %xmm0, (%rsi)
; AVX512-NEXT: retq
%in.elt.not = load i128, ptr %in.elt.ptr, align 64
%in.elt = xor i128 %in.elt.not, -1
%out.elt0.ptr = getelementptr i128, ptr %out.vec.ptr, i64 0
Expand Down Expand Up @@ -1034,19 +1058,46 @@ define void @vec384_double(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
}

define void @vec384_i128(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
; ALL-LABEL: vec384_i128:
; ALL: # %bb.0:
; ALL-NEXT: movq (%rdi), %rax
; ALL-NEXT: movq 8(%rdi), %rcx
; ALL-NEXT: notq %rcx
; ALL-NEXT: notq %rax
; ALL-NEXT: movq %rax, (%rsi)
; ALL-NEXT: movq %rcx, 8(%rsi)
; ALL-NEXT: movq %rcx, 24(%rsi)
; ALL-NEXT: movq %rax, 16(%rsi)
; ALL-NEXT: movq %rcx, 40(%rsi)
; ALL-NEXT: movq %rax, 32(%rsi)
; ALL-NEXT: retq
; SCALAR-LABEL: vec384_i128:
; SCALAR: # %bb.0:
; SCALAR-NEXT: movq (%rdi), %rax
; SCALAR-NEXT: movq 8(%rdi), %rcx
; SCALAR-NEXT: notq %rcx
; SCALAR-NEXT: notq %rax
; SCALAR-NEXT: movq %rax, (%rsi)
; SCALAR-NEXT: movq %rcx, 8(%rsi)
; SCALAR-NEXT: movq %rcx, 24(%rsi)
; SCALAR-NEXT: movq %rax, 16(%rsi)
; SCALAR-NEXT: movq %rcx, 40(%rsi)
; SCALAR-NEXT: movq %rax, 32(%rsi)
; SCALAR-NEXT: retq
;
; SSE-LABEL: vec384_i128:
; SSE: # %bb.0:
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
; SSE-NEXT: pxor (%rdi), %xmm0
; SSE-NEXT: movdqa %xmm0, (%rsi)
; SSE-NEXT: movdqa %xmm0, 16(%rsi)
; SSE-NEXT: movdqa %xmm0, 32(%rsi)
; SSE-NEXT: retq
;
; AVX-LABEL: vec384_i128:
; AVX: # %bb.0:
; AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX-NEXT: vpxor (%rdi), %xmm0, %xmm0
; AVX-NEXT: vmovdqa %xmm0, (%rsi)
; AVX-NEXT: vmovdqa %xmm0, 16(%rsi)
; AVX-NEXT: vmovdqa %xmm0, 32(%rsi)
; AVX-NEXT: retq
;
; AVX512-LABEL: vec384_i128:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vpxor (%rdi), %xmm0, %xmm0
; AVX512-NEXT: vmovdqa %xmm0, (%rsi)
; AVX512-NEXT: vmovdqa %xmm0, 16(%rsi)
; AVX512-NEXT: vmovdqa %xmm0, 32(%rsi)
; AVX512-NEXT: retq
%in.elt.not = load i128, ptr %in.elt.ptr, align 64
%in.elt = xor i128 %in.elt.not, -1
%out.elt0.ptr = getelementptr i128, ptr %out.vec.ptr, i64 0
Expand Down Expand Up @@ -1559,21 +1610,60 @@ define void @vec512_double(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
}

define void @vec512_i128(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
; ALL-LABEL: vec512_i128:
; ALL: # %bb.0:
; ALL-NEXT: movq (%rdi), %rax
; ALL-NEXT: movq 8(%rdi), %rcx
; ALL-NEXT: notq %rcx
; ALL-NEXT: notq %rax
; ALL-NEXT: movq %rax, (%rsi)
; ALL-NEXT: movq %rcx, 8(%rsi)
; ALL-NEXT: movq %rcx, 24(%rsi)
; ALL-NEXT: movq %rax, 16(%rsi)
; ALL-NEXT: movq %rcx, 40(%rsi)
; ALL-NEXT: movq %rax, 32(%rsi)
; ALL-NEXT: movq %rcx, 56(%rsi)
; ALL-NEXT: movq %rax, 48(%rsi)
; ALL-NEXT: retq
; SCALAR-LABEL: vec512_i128:
; SCALAR: # %bb.0:
; SCALAR-NEXT: movq (%rdi), %rax
; SCALAR-NEXT: movq 8(%rdi), %rcx
; SCALAR-NEXT: notq %rcx
; SCALAR-NEXT: notq %rax
; SCALAR-NEXT: movq %rax, (%rsi)
; SCALAR-NEXT: movq %rcx, 8(%rsi)
; SCALAR-NEXT: movq %rcx, 24(%rsi)
; SCALAR-NEXT: movq %rax, 16(%rsi)
; SCALAR-NEXT: movq %rcx, 40(%rsi)
; SCALAR-NEXT: movq %rax, 32(%rsi)
; SCALAR-NEXT: movq %rcx, 56(%rsi)
; SCALAR-NEXT: movq %rax, 48(%rsi)
; SCALAR-NEXT: retq
;
; SSE-LABEL: vec512_i128:
; SSE: # %bb.0:
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
; SSE-NEXT: pxor (%rdi), %xmm0
; SSE-NEXT: movdqa %xmm0, (%rsi)
; SSE-NEXT: movdqa %xmm0, 16(%rsi)
; SSE-NEXT: movdqa %xmm0, 32(%rsi)
; SSE-NEXT: movdqa %xmm0, 48(%rsi)
; SSE-NEXT: retq
;
; AVX1-LABEL: vec512_i128:
; AVX1: # %bb.0:
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vpxor (%rdi), %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vmovaps %ymm0, (%rsi)
; AVX1-NEXT: vmovaps %ymm0, 32(%rsi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: vec512_i128:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX2-NEXT: vpxor (%rdi), %xmm0, %xmm0
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX2-NEXT: vmovdqa %ymm0, (%rsi)
; AVX2-NEXT: vmovdqa %ymm0, 32(%rsi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: vec512_i128:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vpxor (%rdi), %xmm0, %xmm0
; AVX512-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1]
; AVX512-NEXT: vmovdqa64 %zmm0, (%rsi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%in.elt.not = load i128, ptr %in.elt.ptr, align 64
%in.elt = xor i128 %in.elt.not, -1
%out.elt0.ptr = getelementptr i128, ptr %out.vec.ptr, i64 0
Expand All @@ -1588,25 +1678,71 @@ define void @vec512_i128(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
}

define void @vec512_i256(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
; ALL-LABEL: vec512_i256:
; ALL: # %bb.0:
; ALL-NEXT: movq 16(%rdi), %rax
; ALL-NEXT: movq 24(%rdi), %rcx
; ALL-NEXT: movq (%rdi), %rdx
; ALL-NEXT: movq 8(%rdi), %rdi
; ALL-NEXT: notq %rdi
; ALL-NEXT: notq %rdx
; ALL-NEXT: notq %rcx
; ALL-NEXT: notq %rax
; ALL-NEXT: movq %rax, 16(%rsi)
; ALL-NEXT: movq %rcx, 24(%rsi)
; ALL-NEXT: movq %rdx, (%rsi)
; ALL-NEXT: movq %rdi, 8(%rsi)
; ALL-NEXT: movq %rax, 48(%rsi)
; ALL-NEXT: movq %rcx, 56(%rsi)
; ALL-NEXT: movq %rdx, 32(%rsi)
; ALL-NEXT: movq %rdi, 40(%rsi)
; ALL-NEXT: retq
; SCALAR-LABEL: vec512_i256:
; SCALAR: # %bb.0:
; SCALAR-NEXT: movq 16(%rdi), %rax
; SCALAR-NEXT: movq 24(%rdi), %rcx
; SCALAR-NEXT: movq (%rdi), %rdx
; SCALAR-NEXT: movq 8(%rdi), %rdi
; SCALAR-NEXT: notq %rdi
; SCALAR-NEXT: notq %rdx
; SCALAR-NEXT: notq %rcx
; SCALAR-NEXT: notq %rax
; SCALAR-NEXT: movq %rax, 16(%rsi)
; SCALAR-NEXT: movq %rcx, 24(%rsi)
; SCALAR-NEXT: movq %rdx, (%rsi)
; SCALAR-NEXT: movq %rdi, 8(%rsi)
; SCALAR-NEXT: movq %rax, 48(%rsi)
; SCALAR-NEXT: movq %rcx, 56(%rsi)
; SCALAR-NEXT: movq %rdx, 32(%rsi)
; SCALAR-NEXT: movq %rdi, 40(%rsi)
; SCALAR-NEXT: retq
;
; SSE-LABEL: vec512_i256:
; SSE: # %bb.0:
; SSE-NEXT: pcmpeqd %xmm0, %xmm0
; SSE-NEXT: movdqa (%rdi), %xmm1
; SSE-NEXT: pxor %xmm0, %xmm1
; SSE-NEXT: pxor 16(%rdi), %xmm0
; SSE-NEXT: movdqa %xmm0, 16(%rsi)
; SSE-NEXT: movdqa %xmm1, (%rsi)
; SSE-NEXT: movdqa %xmm1, 32(%rsi)
; SSE-NEXT: movdqa %xmm0, 48(%rsi)
; SSE-NEXT: retq
;
; AVX1-LABEL: vec512_i256:
; AVX1: # %bb.0:
; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX1-NEXT: vcmptrueps %ymm0, %ymm0, %ymm0
; AVX1-NEXT: vxorps (%rdi), %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm0, 16(%rsi)
; AVX1-NEXT: vmovaps %xmm0, (%rsi)
; AVX1-NEXT: vextractf128 $1, %ymm0, 48(%rsi)
; AVX1-NEXT: vmovaps %xmm0, 32(%rsi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: vec512_i256:
; AVX2: # %bb.0:
; AVX2-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX2-NEXT: vpxor (%rdi), %ymm0, %ymm0
; AVX2-NEXT: vextracti128 $1, %ymm0, 16(%rsi)
; AVX2-NEXT: vmovdqa %xmm0, (%rsi)
; AVX2-NEXT: vextracti128 $1, %ymm0, 48(%rsi)
; AVX2-NEXT: vmovdqa %xmm0, 32(%rsi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: vec512_i256:
; AVX512: # %bb.0:
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vpxor (%rdi), %ymm0, %ymm0
; AVX512-NEXT: vextracti128 $1, %ymm0, 16(%rsi)
; AVX512-NEXT: vmovdqa %xmm0, (%rsi)
; AVX512-NEXT: vextracti128 $1, %ymm0, 48(%rsi)
; AVX512-NEXT: vmovdqa %xmm0, 32(%rsi)
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
%in.elt.not = load i256, ptr %in.elt.ptr, align 64
%in.elt = xor i256 %in.elt.not, -1
%out.elt0.ptr = getelementptr i256, ptr %out.vec.ptr, i64 0
Expand All @@ -1616,14 +1752,8 @@ define void @vec512_i256(ptr %in.elt.ptr, ptr %out.vec.ptr) nounwind {
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; AVX: {{.*}}
; AVX1: {{.*}}
; AVX2: {{.*}}
; AVX512: {{.*}}
; AVX512BW: {{.*}}
; AVX512F: {{.*}}
; SCALAR: {{.*}}
; SSE: {{.*}}
; SSE2: {{.*}}
; SSE2-ONLY: {{.*}}
; SSE3: {{.*}}
Expand Down
30 changes: 8 additions & 22 deletions llvm/test/CodeGen/X86/pr166744.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,11 @@ define i1 @PR166744(ptr %v, i64 %idx, i1 zeroext %b) {
; POSTRA-NEXT: btrl %esi, %ecx
; POSTRA-NEXT: orl %ecx, %edx
; POSTRA-NEXT: movl %edx, (%rdi,%rax,4)
; POSTRA-NEXT: movq 16(%rdi), %rax
; POSTRA-NEXT: movq (%rdi), %rcx
; POSTRA-NEXT: movq 24(%rdi), %rdx
; POSTRA-NEXT: movq 8(%rdi), %rsi
; POSTRA-NEXT: orq 56(%rdi), %rdx
; POSTRA-NEXT: orq 40(%rdi), %rsi
; POSTRA-NEXT: orq 48(%rdi), %rax
; POSTRA-NEXT: orq 32(%rdi), %rcx
; POSTRA-NEXT: orq %rdx, %rsi
; POSTRA-NEXT: orq %rax, %rcx
; POSTRA-NEXT: orq %rsi, %rcx
; POSTRA-NEXT: vmovdqu (%rdi), %ymm0
; POSTRA-NEXT: vpor 32(%rdi), %ymm0, %ymm0
; POSTRA-NEXT: vptest %ymm0, %ymm0
; POSTRA-NEXT: setne %al
; POSTRA-NEXT: vzeroupper
; POSTRA-NEXT: retq
;
; NOPOSTRA-LABEL: PR166744:
Expand All @@ -38,18 +31,11 @@ define i1 @PR166744(ptr %v, i64 %idx, i1 zeroext %b) {
; NOPOSTRA-NEXT: shlxl %eax, %edx, %eax
; NOPOSTRA-NEXT: orl %ecx, %eax
; NOPOSTRA-NEXT: movl %eax, (%rdi,%rsi)
; NOPOSTRA-NEXT: movq 16(%rdi), %rax
; NOPOSTRA-NEXT: movq (%rdi), %rcx
; NOPOSTRA-NEXT: movq 8(%rdi), %rdx
; NOPOSTRA-NEXT: movq 24(%rdi), %rsi
; NOPOSTRA-NEXT: orq 56(%rdi), %rsi
; NOPOSTRA-NEXT: orq 40(%rdi), %rdx
; NOPOSTRA-NEXT: orq 48(%rdi), %rax
; NOPOSTRA-NEXT: orq 32(%rdi), %rcx
; NOPOSTRA-NEXT: orq %rsi, %rdx
; NOPOSTRA-NEXT: orq %rax, %rcx
; NOPOSTRA-NEXT: orq %rdx, %rcx
; NOPOSTRA-NEXT: vmovdqu (%rdi), %ymm0
; NOPOSTRA-NEXT: vpor 32(%rdi), %ymm0, %ymm0
; NOPOSTRA-NEXT: vptest %ymm0, %ymm0
; NOPOSTRA-NEXT: setne %al
; NOPOSTRA-NEXT: vzeroupper
; NOPOSTRA-NEXT: retq
%rem = and i64 %idx, 511
%sh_prom = zext nneg i64 %rem to i512
Expand Down
Loading
Loading