-
Notifications
You must be signed in to change notification settings - Fork 15.5k
Open
Labels
Description
Noticed when playing with _BitInt() for very large integers
https://zig.godbolt.org/z/x1qMfc3G7
define void @and_i512_mem(ptr %p0, ptr %p1, ptr %p2) {
%a0 = load i512, ptr %p0
%a1 = load i512, ptr %p1
%res = and i512 %a0, %a1
store i512 %res, ptr %p2
ret void
}and_i512_mem: # @and_i512_mem
pushq %rbx
movq 48(%rdi), %rax
movq 56(%rdi), %rcx
movq 32(%rdi), %r8
movq 40(%rdi), %r9
movq 16(%rdi), %r10
movq 24(%rdi), %r11
movq (%rdi), %rbx
movq 8(%rdi), %rdi
andq 8(%rsi), %rdi
andq (%rsi), %rbx
andq 24(%rsi), %r11
andq 16(%rsi), %r10
andq 40(%rsi), %r9
andq 32(%rsi), %r8
andq 56(%rsi), %rcx
andq 48(%rsi), %rax
movq %rax, 48(%rdx)
movq %rcx, 56(%rdx)
movq %r8, 32(%rdx)
movq %r9, 40(%rdx)
movq %r10, 16(%rdx)
movq %r11, 24(%rdx)
movq %rbx, (%rdx)
movq %rdi, 8(%rdx)
popq %rbx
retqwhen on a a avx512 target it could be a simple ZMM load, folded-and + store sequence (or split XMM/YMM sequence on SSE/AVX1 targets).
We could attempt this in SLP/vectorcombine or leave it to the DAG and handle it prior to type legalisation.
For cases where part of the args are passed by register we'd need to either to decide if the BUILD_VECTOR was worth it, or if there's a shadow stack variable we can load from instead:
define i512 @and_i512(i512 %a0, i512 %a1) {
%res = and i512 %a0, %a1
ret i512 %res
}