Skip to content

Conversation

@RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Dec 9, 2025

No description provided.

@RKSimon RKSimon enabled auto-merge (squash) December 9, 2025 17:49
@llvmbot
Copy link
Member

llvmbot commented Dec 9, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Patch is 63.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171481.diff

1 Files Affected:

  • (modified) llvm/test/CodeGen/X86/bitcnt-big-integer.ll (+1634-22)
diff --git a/llvm/test/CodeGen/X86/bitcnt-big-integer.ll b/llvm/test/CodeGen/X86/bitcnt-big-integer.ll
index 0fd555991ae29..749b3ddc96d0d 100644
--- a/llvm/test/CodeGen/X86/bitcnt-big-integer.ll
+++ b/llvm/test/CodeGen/X86/bitcnt-big-integer.ll
@@ -52,6 +52,63 @@ define i32 @load_ctpop_i128(ptr %p0) nounwind {
   ret i32 %res
 }
 
+define i32 @vector_ctpop_i128(<4 x i32> %v0) nounwind {
+; SSE-LABEL: vector_ctpop_i128:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq %xmm0, %rax
+; SSE-NEXT:    pextrq $1, %xmm0, %rcx
+; SSE-NEXT:    popcntq %rcx, %rcx
+; SSE-NEXT:    popcntq %rax, %rax
+; SSE-NEXT:    addl %ecx, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: vector_ctpop_i128:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX2-NEXT:    vmovq %xmm0, %rcx
+; AVX2-NEXT:    popcntq %rax, %rdx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq %rcx, %rax
+; AVX2-NEXT:    addl %edx, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: vector_ctpop_i128:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX512F-NEXT:    vmovq %xmm0, %rcx
+; AVX512F-NEXT:    popcntq %rax, %rdx
+; AVX512F-NEXT:    popcntq %rcx, %rax
+; AVX512F-NEXT:    addl %edx, %eax
+; AVX512F-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: vector_ctpop_i128:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rcx
+; AVX512VL-NEXT:    popcntq %rcx, %rcx
+; AVX512VL-NEXT:    popcntq %rax, %rax
+; AVX512VL-NEXT:    addl %ecx, %eax
+; AVX512VL-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT:    retq
+;
+; AVX512POPCNT-LABEL: vector_ctpop_i128:
+; AVX512POPCNT:       # %bb.0:
+; AVX512POPCNT-NEXT:    vmovq %xmm0, %rax
+; AVX512POPCNT-NEXT:    vpextrq $1, %xmm0, %rcx
+; AVX512POPCNT-NEXT:    popcntq %rcx, %rcx
+; AVX512POPCNT-NEXT:    popcntq %rax, %rax
+; AVX512POPCNT-NEXT:    addl %ecx, %eax
+; AVX512POPCNT-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512POPCNT-NEXT:    retq
+  %a0 = bitcast <4 x i32> %v0 to i128
+  %cnt = call i128 @llvm.ctpop.i128(i128 %a0)
+  %res = trunc i128 %cnt to i32
+  ret i32 %res
+}
+
 define i32 @test_ctpop_i256(i256 %a0) nounwind {
 ; CHECK-LABEL: test_ctpop_i256:
 ; CHECK:       # %bb.0:
@@ -183,6 +240,107 @@ define i32 @load_ctpop_i256(ptr %p0) nounwind {
   ret i32 %res
 }
 
+define i32 @vector_ctpop_i256(<8 x i32> %v0) nounwind {
+; SSE-LABEL: vector_ctpop_i256:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pextrq $1, %xmm0, %rax
+; SSE-NEXT:    movq %xmm0, %rcx
+; SSE-NEXT:    movq %xmm1, %rdx
+; SSE-NEXT:    pextrq $1, %xmm1, %rsi
+; SSE-NEXT:    popcntq %rsi, %rsi
+; SSE-NEXT:    popcntq %rdx, %rdx
+; SSE-NEXT:    addl %esi, %edx
+; SSE-NEXT:    xorl %esi, %esi
+; SSE-NEXT:    popcntq %rax, %rsi
+; SSE-NEXT:    xorl %eax, %eax
+; SSE-NEXT:    popcntq %rcx, %rax
+; SSE-NEXT:    addl %esi, %eax
+; SSE-NEXT:    addl %edx, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: vector_ctpop_i256:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX2-NEXT:    vmovq %xmm0, %rcx
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vpextrq $1, %xmm0, %rdx
+; AVX2-NEXT:    vmovq %xmm0, %rsi
+; AVX2-NEXT:    popcntq %rdx, %rdx
+; AVX2-NEXT:    popcntq %rsi, %rsi
+; AVX2-NEXT:    addl %edx, %esi
+; AVX2-NEXT:    xorl %edx, %edx
+; AVX2-NEXT:    popcntq %rax, %rdx
+; AVX2-NEXT:    xorl %eax, %eax
+; AVX2-NEXT:    popcntq %rcx, %rax
+; AVX2-NEXT:    addl %edx, %eax
+; AVX2-NEXT:    addl %esi, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: vector_ctpop_i256:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX512F-NEXT:    vmovq %xmm0, %rcx
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX512F-NEXT:    vpextrq $1, %xmm0, %rdx
+; AVX512F-NEXT:    vmovq %xmm0, %rsi
+; AVX512F-NEXT:    popcntq %rdx, %rdx
+; AVX512F-NEXT:    popcntq %rsi, %rsi
+; AVX512F-NEXT:    addl %edx, %esi
+; AVX512F-NEXT:    popcntq %rax, %rdx
+; AVX512F-NEXT:    popcntq %rcx, %rax
+; AVX512F-NEXT:    addl %edx, %eax
+; AVX512F-NEXT:    addl %esi, %eax
+; AVX512F-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: vector_ctpop_i256:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX512VL-NEXT:    vmovq %xmm0, %rcx
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rdx
+; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rsi
+; AVX512VL-NEXT:    popcntq %rsi, %rsi
+; AVX512VL-NEXT:    popcntq %rdx, %rdx
+; AVX512VL-NEXT:    addl %esi, %edx
+; AVX512VL-NEXT:    xorl %esi, %esi
+; AVX512VL-NEXT:    popcntq %rax, %rsi
+; AVX512VL-NEXT:    xorl %eax, %eax
+; AVX512VL-NEXT:    popcntq %rcx, %rax
+; AVX512VL-NEXT:    addl %esi, %eax
+; AVX512VL-NEXT:    addl %edx, %eax
+; AVX512VL-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+;
+; AVX512POPCNT-LABEL: vector_ctpop_i256:
+; AVX512POPCNT:       # %bb.0:
+; AVX512POPCNT-NEXT:    vpextrq $1, %xmm0, %rax
+; AVX512POPCNT-NEXT:    vmovq %xmm0, %rcx
+; AVX512POPCNT-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX512POPCNT-NEXT:    vmovq %xmm0, %rdx
+; AVX512POPCNT-NEXT:    vpextrq $1, %xmm0, %rsi
+; AVX512POPCNT-NEXT:    popcntq %rsi, %rsi
+; AVX512POPCNT-NEXT:    popcntq %rdx, %rdx
+; AVX512POPCNT-NEXT:    addl %esi, %edx
+; AVX512POPCNT-NEXT:    xorl %esi, %esi
+; AVX512POPCNT-NEXT:    popcntq %rax, %rsi
+; AVX512POPCNT-NEXT:    xorl %eax, %eax
+; AVX512POPCNT-NEXT:    popcntq %rcx, %rax
+; AVX512POPCNT-NEXT:    addl %esi, %eax
+; AVX512POPCNT-NEXT:    addl %edx, %eax
+; AVX512POPCNT-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512POPCNT-NEXT:    vzeroupper
+; AVX512POPCNT-NEXT:    retq
+  %a0 = bitcast <8 x i32> %v0 to i256
+  %cnt = call i256 @llvm.ctpop.i256(i256 %a0)
+  %res = trunc i256 %cnt to i32
+  ret i32 %res
+}
+
 define i32 @test_ctpop_i512(i512 %a0) nounwind {
 ; CHECK-LABEL: test_ctpop_i512:
 ; CHECK:       # %bb.0:
@@ -404,6 +562,166 @@ define i32 @load_ctpop_i512(ptr %p0) nounwind {
   ret i32 %res
 }
 
+define i32 @vector_ctpop_i512(<16 x i32> %v0) nounwind {
+; SSE-LABEL: vector_ctpop_i512:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq %xmm0, %rax
+; SSE-NEXT:    pextrq $1, %xmm0, %rcx
+; SSE-NEXT:    movq %xmm1, %rdx
+; SSE-NEXT:    pextrq $1, %xmm1, %rsi
+; SSE-NEXT:    pextrq $1, %xmm2, %rdi
+; SSE-NEXT:    movq %xmm2, %r8
+; SSE-NEXT:    movq %xmm3, %r9
+; SSE-NEXT:    pextrq $1, %xmm3, %r10
+; SSE-NEXT:    popcntq %r10, %r10
+; SSE-NEXT:    popcntq %r9, %r9
+; SSE-NEXT:    addl %r10d, %r9d
+; SSE-NEXT:    popcntq %rdi, %rdi
+; SSE-NEXT:    popcntq %r8, %r8
+; SSE-NEXT:    addl %edi, %r8d
+; SSE-NEXT:    addl %r9d, %r8d
+; SSE-NEXT:    popcntq %rsi, %rsi
+; SSE-NEXT:    popcntq %rdx, %rdx
+; SSE-NEXT:    addl %esi, %edx
+; SSE-NEXT:    popcntq %rcx, %rcx
+; SSE-NEXT:    popcntq %rax, %rax
+; SSE-NEXT:    addl %ecx, %eax
+; SSE-NEXT:    addl %edx, %eax
+; SSE-NEXT:    addl %r8d, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: vector_ctpop_i512:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    vpextrq $1, %xmm0, %rcx
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vmovq %xmm0, %rdx
+; AVX2-NEXT:    vpextrq $1, %xmm0, %rsi
+; AVX2-NEXT:    vpextrq $1, %xmm1, %rdi
+; AVX2-NEXT:    vmovq %xmm1, %r8
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm0
+; AVX2-NEXT:    vpextrq $1, %xmm0, %r9
+; AVX2-NEXT:    vmovq %xmm0, %r10
+; AVX2-NEXT:    popcntq %r9, %r9
+; AVX2-NEXT:    popcntq %r10, %r10
+; AVX2-NEXT:    addl %r9d, %r10d
+; AVX2-NEXT:    popcntq %rdi, %rdi
+; AVX2-NEXT:    popcntq %r8, %r8
+; AVX2-NEXT:    addl %edi, %r8d
+; AVX2-NEXT:    addl %r10d, %r8d
+; AVX2-NEXT:    popcntq %rsi, %rsi
+; AVX2-NEXT:    popcntq %rdx, %rdx
+; AVX2-NEXT:    addl %esi, %edx
+; AVX2-NEXT:    popcntq %rcx, %rcx
+; AVX2-NEXT:    popcntq %rax, %rax
+; AVX2-NEXT:    addl %ecx, %eax
+; AVX2-NEXT:    addl %edx, %eax
+; AVX2-NEXT:    addl %r8d, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: vector_ctpop_i512:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vmovq %xmm1, %rax
+; AVX512F-NEXT:    vpextrq $1, %xmm1, %rcx
+; AVX512F-NEXT:    vpextrq $1, %xmm0, %rdx
+; AVX512F-NEXT:    vmovq %xmm0, %rsi
+; AVX512F-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
+; AVX512F-NEXT:    vpextrq $1, %xmm1, %rdi
+; AVX512F-NEXT:    vmovq %xmm1, %r8
+; AVX512F-NEXT:    vextracti32x4 $3, %zmm0, %xmm0
+; AVX512F-NEXT:    vpextrq $1, %xmm0, %r9
+; AVX512F-NEXT:    vmovq %xmm0, %r10
+; AVX512F-NEXT:    popcntq %r9, %r9
+; AVX512F-NEXT:    popcntq %r10, %r10
+; AVX512F-NEXT:    addl %r9d, %r10d
+; AVX512F-NEXT:    popcntq %rdi, %rdi
+; AVX512F-NEXT:    popcntq %r8, %r8
+; AVX512F-NEXT:    addl %edi, %r8d
+; AVX512F-NEXT:    addl %r10d, %r8d
+; AVX512F-NEXT:    popcntq %rdx, %rdx
+; AVX512F-NEXT:    popcntq %rsi, %rsi
+; AVX512F-NEXT:    addl %edx, %esi
+; AVX512F-NEXT:    popcntq %rcx, %rcx
+; AVX512F-NEXT:    popcntq %rax, %rax
+; AVX512F-NEXT:    addl %ecx, %eax
+; AVX512F-NEXT:    addl %esi, %eax
+; AVX512F-NEXT:    addl %r8d, %eax
+; AVX512F-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: vector_ctpop_i512:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512VL-NEXT:    vmovq %xmm1, %rax
+; AVX512VL-NEXT:    vpextrq $1, %xmm1, %rcx
+; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rdx
+; AVX512VL-NEXT:    vmovq %xmm0, %rsi
+; AVX512VL-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
+; AVX512VL-NEXT:    vmovq %xmm1, %rdi
+; AVX512VL-NEXT:    vpextrq $1, %xmm1, %r8
+; AVX512VL-NEXT:    vextracti32x4 $3, %zmm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %r9
+; AVX512VL-NEXT:    vpextrq $1, %xmm0, %r10
+; AVX512VL-NEXT:    popcntq %r10, %r10
+; AVX512VL-NEXT:    popcntq %r9, %r9
+; AVX512VL-NEXT:    addl %r10d, %r9d
+; AVX512VL-NEXT:    popcntq %r8, %r8
+; AVX512VL-NEXT:    popcntq %rdi, %rdi
+; AVX512VL-NEXT:    addl %r8d, %edi
+; AVX512VL-NEXT:    addl %r9d, %edi
+; AVX512VL-NEXT:    popcntq %rdx, %rdx
+; AVX512VL-NEXT:    popcntq %rsi, %rsi
+; AVX512VL-NEXT:    addl %edx, %esi
+; AVX512VL-NEXT:    popcntq %rcx, %rcx
+; AVX512VL-NEXT:    popcntq %rax, %rax
+; AVX512VL-NEXT:    addl %ecx, %eax
+; AVX512VL-NEXT:    addl %esi, %eax
+; AVX512VL-NEXT:    addl %edi, %eax
+; AVX512VL-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+;
+; AVX512POPCNT-LABEL: vector_ctpop_i512:
+; AVX512POPCNT:       # %bb.0:
+; AVX512POPCNT-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512POPCNT-NEXT:    vmovq %xmm1, %rax
+; AVX512POPCNT-NEXT:    vpextrq $1, %xmm1, %rcx
+; AVX512POPCNT-NEXT:    vpextrq $1, %xmm0, %rdx
+; AVX512POPCNT-NEXT:    vmovq %xmm0, %rsi
+; AVX512POPCNT-NEXT:    vextracti32x4 $2, %zmm0, %xmm1
+; AVX512POPCNT-NEXT:    vmovq %xmm1, %rdi
+; AVX512POPCNT-NEXT:    vpextrq $1, %xmm1, %r8
+; AVX512POPCNT-NEXT:    vextracti32x4 $3, %zmm0, %xmm0
+; AVX512POPCNT-NEXT:    vmovq %xmm0, %r9
+; AVX512POPCNT-NEXT:    vpextrq $1, %xmm0, %r10
+; AVX512POPCNT-NEXT:    popcntq %r10, %r10
+; AVX512POPCNT-NEXT:    popcntq %r9, %r9
+; AVX512POPCNT-NEXT:    addl %r10d, %r9d
+; AVX512POPCNT-NEXT:    popcntq %r8, %r8
+; AVX512POPCNT-NEXT:    popcntq %rdi, %rdi
+; AVX512POPCNT-NEXT:    addl %r8d, %edi
+; AVX512POPCNT-NEXT:    addl %r9d, %edi
+; AVX512POPCNT-NEXT:    popcntq %rdx, %rdx
+; AVX512POPCNT-NEXT:    popcntq %rsi, %rsi
+; AVX512POPCNT-NEXT:    addl %edx, %esi
+; AVX512POPCNT-NEXT:    popcntq %rcx, %rcx
+; AVX512POPCNT-NEXT:    popcntq %rax, %rax
+; AVX512POPCNT-NEXT:    addl %ecx, %eax
+; AVX512POPCNT-NEXT:    addl %esi, %eax
+; AVX512POPCNT-NEXT:    addl %edi, %eax
+; AVX512POPCNT-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512POPCNT-NEXT:    vzeroupper
+; AVX512POPCNT-NEXT:    retq
+  %a0 = bitcast <16 x i32> %v0 to i512
+  %cnt = call i512 @llvm.ctpop.i512(i512 %a0)
+  %res = trunc i512 %cnt to i32
+  ret i32 %res
+}
+
 define i32 @test_ctpop_i1024(i1024 %a0) nounwind {
 ; SSE-LABEL: test_ctpop_i1024:
 ; SSE:       # %bb.0:
@@ -969,6 +1287,75 @@ define i32 @load_ctlz_i128(ptr %p0) nounwind {
   ret i32 %res
 }
 
+define i32 @vector_ctlz_i128(<4 x i32> %v0) nounwind {
+; SSE-LABEL: vector_ctlz_i128:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq %xmm0, %rcx
+; SSE-NEXT:    pextrq $1, %xmm0, %rdx
+; SSE-NEXT:    bsrq %rdx, %rsi
+; SSE-NEXT:    xorl $63, %esi
+; SSE-NEXT:    movl $127, %eax
+; SSE-NEXT:    bsrq %rcx, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %rdx, %rdx
+; SSE-NEXT:    cmovnel %esi, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: vector_ctlz_i128:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    vpextrq $1, %xmm0, %rcx
+; AVX2-NEXT:    lzcntq %rcx, %rdx
+; AVX2-NEXT:    lzcntq %rax, %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %rcx, %rcx
+; AVX2-NEXT:    cmovnel %edx, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: vector_ctlz_i128:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vmovq %xmm0, %rax
+; AVX512F-NEXT:    vpextrq $1, %xmm0, %rcx
+; AVX512F-NEXT:    lzcntq %rcx, %rdx
+; AVX512F-NEXT:    lzcntq %rax, %rax
+; AVX512F-NEXT:    addl $64, %eax
+; AVX512F-NEXT:    testq %rcx, %rcx
+; AVX512F-NEXT:    cmovnel %edx, %eax
+; AVX512F-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: vector_ctlz_i128:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rcx
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    lzcntq %rcx, %rdx
+; AVX512VL-NEXT:    lzcntq %rax, %rax
+; AVX512VL-NEXT:    addl $64, %eax
+; AVX512VL-NEXT:    testq %rcx, %rcx
+; AVX512VL-NEXT:    cmovnel %edx, %eax
+; AVX512VL-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT:    retq
+;
+; AVX512POPCNT-LABEL: vector_ctlz_i128:
+; AVX512POPCNT:       # %bb.0:
+; AVX512POPCNT-NEXT:    vpextrq $1, %xmm0, %rcx
+; AVX512POPCNT-NEXT:    vmovq %xmm0, %rax
+; AVX512POPCNT-NEXT:    lzcntq %rcx, %rdx
+; AVX512POPCNT-NEXT:    lzcntq %rax, %rax
+; AVX512POPCNT-NEXT:    addl $64, %eax
+; AVX512POPCNT-NEXT:    testq %rcx, %rcx
+; AVX512POPCNT-NEXT:    cmovnel %edx, %eax
+; AVX512POPCNT-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512POPCNT-NEXT:    retq
+  %a0 = bitcast <4 x i32> %v0 to i128
+  %cnt = call i128 @llvm.ctlz.i128(i128 %a0, i1 0)
+  %res = trunc i128 %cnt to i32
+  ret i32 %res
+}
+
 define i32 @test_ctlz_i256(i256 %a0) nounwind {
 ; SSE-LABEL: test_ctlz_i256:
 ; SSE:       # %bb.0:
@@ -1125,6 +1512,135 @@ define i32 @load_ctlz_i256(ptr %p0) nounwind {
   ret i32 %res
 }
 
+define i32 @vector_ctlz_i256(<8 x i32> %v0) nounwind {
+; SSE-LABEL: vector_ctlz_i256:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movq %xmm0, %rcx
+; SSE-NEXT:    pextrq $1, %xmm0, %rdx
+; SSE-NEXT:    movq %xmm1, %rax
+; SSE-NEXT:    pextrq $1, %xmm1, %rsi
+; SSE-NEXT:    bsrq %rsi, %rdi
+; SSE-NEXT:    xorl $63, %edi
+; SSE-NEXT:    bsrq %rax, %r8
+; SSE-NEXT:    xorl $63, %r8d
+; SSE-NEXT:    orl $64, %r8d
+; SSE-NEXT:    testq %rsi, %rsi
+; SSE-NEXT:    cmovnel %edi, %r8d
+; SSE-NEXT:    bsrq %rdx, %rsi
+; SSE-NEXT:    xorl $63, %esi
+; SSE-NEXT:    movl $127, %eax
+; SSE-NEXT:    bsrq %rcx, %rax
+; SSE-NEXT:    xorl $63, %eax
+; SSE-NEXT:    addl $64, %eax
+; SSE-NEXT:    testq %rdx, %rdx
+; SSE-NEXT:    cmovnel %esi, %eax
+; SSE-NEXT:    subl $-128, %eax
+; SSE-NEXT:    ptest %xmm1, %xmm1
+; SSE-NEXT:    cmovnel %r8d, %eax
+; SSE-NEXT:    # kill: def $eax killed $eax killed $rax
+; SSE-NEXT:    retq
+;
+; AVX2-LABEL: vector_ctlz_i256:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vmovq %xmm0, %rax
+; AVX2-NEXT:    vpextrq $1, %xmm0, %rcx
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX2-NEXT:    vmovq %xmm0, %rdx
+; AVX2-NEXT:    vpextrq $1, %xmm0, %rsi
+; AVX2-NEXT:    lzcntq %rsi, %rdi
+; AVX2-NEXT:    lzcntq %rdx, %r8
+; AVX2-NEXT:    addl $64, %r8d
+; AVX2-NEXT:    testq %rsi, %rsi
+; AVX2-NEXT:    cmovnel %edi, %r8d
+; AVX2-NEXT:    xorl %edi, %edi
+; AVX2-NEXT:    lzcntq %rcx, %rdi
+; AVX2-NEXT:    lzcntq %rax, %rax
+; AVX2-NEXT:    addl $64, %eax
+; AVX2-NEXT:    testq %rcx, %rcx
+; AVX2-NEXT:    cmovnel %edi, %eax
+; AVX2-NEXT:    subl $-128, %eax
+; AVX2-NEXT:    orq %rsi, %rdx
+; AVX2-NEXT:    cmovnel %r8d, %eax
+; AVX2-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: vector_ctlz_i256:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vmovq %xmm0, %rax
+; AVX512F-NEXT:    vpextrq $1, %xmm0, %rcx
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX512F-NEXT:    vmovq %xmm0, %rdx
+; AVX512F-NEXT:    vpextrq $1, %xmm0, %rsi
+; AVX512F-NEXT:    lzcntq %rsi, %rdi
+; AVX512F-NEXT:    lzcntq %rdx, %r8
+; AVX512F-NEXT:    addl $64, %r8d
+; AVX512F-NEXT:    testq %rsi, %rsi
+; AVX512F-NEXT:    cmovnel %edi, %r8d
+; AVX512F-NEXT:    lzcntq %rcx, %rdi
+; AVX512F-NEXT:    lzcntq %rax, %rax
+; AVX512F-NEXT:    addl $64, %eax
+; AVX512F-NEXT:    testq %rcx, %rcx
+; AVX512F-NEXT:    cmovnel %edi, %eax
+; AVX512F-NEXT:    subl $-128, %eax
+; AVX512F-NEXT:    orq %rsi, %rdx
+; AVX512F-NEXT:    cmovnel %r8d, %eax
+; AVX512F-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512F-NEXT:    retq
+;
+; AVX512VL-LABEL: vector_ctlz_i256:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rcx
+; AVX512VL-NEXT:    vmovq %xmm0, %rax
+; AVX512VL-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX512VL-NEXT:    vmovq %xmm0, %rdx
+; AVX512VL-NEXT:    vpextrq $1, %xmm0, %rsi
+; AVX512VL-NEXT:    lzcntq %rsi, %rdi
+; AVX512VL-NEXT:    lzcntq %rdx, %r8
+; AVX512VL-NEXT:    addl $64, %r8d
+; AVX512VL-NEXT:    testq %rsi, %rsi
+; AVX512VL-NEXT:    cmovnel %edi, %r8d
+; AVX512VL-NEXT:    lzcntq %rcx, %rdi
+; AVX512VL-NEXT:    lzcntq %rax, %rax
+; AVX512VL-NEXT:    addl $64, %eax
+; AVX512VL-NEXT:    testq %rcx, %rcx
+; AVX512VL-NEXT:    cmovnel %edi, %eax
+; AVX512VL-NEXT:    subl $-128, %eax
+; AVX512VL-NEXT:    orq %rsi, %rdx
+; AVX512VL-NEXT:    cmovnel %r8d, %eax
+; AVX512VL-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+;
+; AVX512POPCNT-LABEL: vector_ctlz_i256:
+; AVX512POPCNT:       # %bb.0:
+; AVX512POPCNT-NEXT:    vpextrq $1, %xmm0, %rcx
+; AVX512POPCNT-NEXT:    vmovq %xmm0, %rax
+; AVX512POPCNT-NEXT:    vextracti128 $1, %ymm0, %xmm0
+; AVX512POPCNT-NEXT:    vmovq %xmm0, %rdx
+; AVX512POPCNT-NEXT:    vpextrq $1, %xmm0, %rsi
+; AVX512POPCNT-NEXT:    lzcntq %rsi, %rdi
+; AVX512POPCNT-NEXT:    lzcntq %rdx, %r8
+; AVX512POPCNT-NEXT:    addl $64, %r8d
+; AVX512POPCNT-NEXT:    testq %rsi, %rsi
+; AVX512POPCNT-NEXT:    cmovnel %edi, %r8d
+; AVX512POPCNT-NEXT:    lzcntq %rcx, %rdi
+; AVX512POPCNT-NEXT:    lzcntq %rax, %rax
+; AVX512POPCNT-NEXT:    addl $64, %eax
+; AVX512POPCNT-NEXT:    testq %rcx, %rcx
+; AVX512POPCNT-NEXT:    cmovnel %edi, %eax
+; AVX512POPCNT-NEXT:    subl $-128, %eax
+; AVX512POPCNT-NEXT:    orq %rsi, %rdx
+; AVX512POPCNT-NEXT:    cmovnel %r8d, %eax
+; AVX512POPCNT-NEXT:    # kill: def $eax killed $eax killed $rax
+; AVX512POPCNT-NEXT:    vzeroupper
+; AVX512POPCNT-NEXT:    retq
+  %a0 = bitcast <8 x i32> %v0 to i256
+  %cnt = call i256 @llvm.ctlz.i256(i256 %a0, i1 0)
+  %res = trunc i256 %cnt to i32
+  ret i32 %res
+}
+
 define i32 @test_ctlz_i512(i512 %a0) nounwind {
 ; SSE-LABEL: test_ctlz_i512:
 ; SSE:       # %bb.0:
@@ -1423,10 +1939,155 @@ define i32 @load_ctlz_i512(ptr %p0) nounwind {
 ; AVX2-NEXT:    popq %r15
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: load_ctlz_i512:
+; AVX512F-LABEL: load_ctlz_i512:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [7,6,5,4,3,2,1,0]
+; AVX512F-NEXT:    vpermq (%rdi), %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; AVX512F-NEXT:    vplzcntq %zmm0, %zmm0
+; AVX512F-NEXT:    vpaddq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
+; AVX512F-NEXT:    vpbroadcastq {{.*#+}} zmm1 = [512,512,512,512,512,512,5...
[truncated]

@RKSimon RKSimon merged commit 00bccfc into llvm:main Dec 9, 2025
9 of 11 checks passed
@RKSimon RKSimon deleted the x86-bitcnt-vector branch December 10, 2025 08:47
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants