[AArch64] Miscompilation of struct containing complex double and BitInt #89230

ostannard · 2024-04-18T13:18:52Z

This code is miscompiled when targeting AArch64 at all optimisation levels:

extern void __aeabi_assert(const char *, const char *, int);
#define assert(e) ((e) ? (void)0 : __aeabi_assert(#e, __FILE__, __LINE__))
#include <complex.h>

union S131 {
  double _Complex M0 __attribute__((aligned(16)));
  signed _BitInt(124) M1;
};

void F94(union S131 P3) {
  assert(P3.M0 == 1.0 + 2.0 * _Complex_I);
}

int main() {
  union S131 P3 = {1.0 + 2.0 * _Complex_I};
  F94(P3);

  return 0;
}

I think that the IR generated by clang is wrong, before any optimisations:

$ /work/llvm/build/bin/clang --target=aarch64--none-elf -march=armv8-a -c test.c -o - -S -O0 -emit-llvm
; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-none-elf"

%union.S131 = type { i124 }

@.str = private unnamed_addr constant [21 x i8] c"P3.M0 == 1.0 + 2.0if\00", align 1
@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", align 1
@__const.main.P3 = private unnamed_addr constant { { double, double } } { { double, double } { double 1.000000e+00, double 2.000000e+00 } }, align 16

; Function Attrs: noinline nounwind optnone
define dso_local void @F94(i128 %P3.coerce) #0 {
entry:
  %P3 = alloca %union.S131, align 16
  %coerce.dive = getelementptr inbounds %union.S131, ptr %P3, i32 0, i32 0
  %coerce.val.ii = trunc i128 %P3.coerce to i124
  store i124 %coerce.val.ii, ptr %coerce.dive, align 16
  %P3.realp = getelementptr inbounds { double, double }, ptr %P3, i32 0, i32 0
  %P3.real = load double, ptr %P3.realp, align 16
  %P3.imagp = getelementptr inbounds { double, double }, ptr %P3, i32 0, i32 1
  %P3.imag = load double, ptr %P3.imagp, align 8
  %cmp.r = fcmp oeq double %P3.real, 1.000000e+00
  %cmp.i = fcmp oeq double %P3.imag, 2.000000e+00
  %and.ri = and i1 %cmp.r, %cmp.i
  br i1 %and.ri, label %cond.true, label %cond.false

cond.true:                                        ; preds = %entry
  br label %cond.end

cond.false:                                       ; preds = %entry
  call void @__aeabi_assert(ptr noundef @.str, ptr noundef @.str.1, i32 noundef 10)
  br label %cond.end

cond.end:                                         ; preds = %cond.false, %cond.true
  ret void
}

declare dso_local void @__aeabi_assert(ptr noundef, ptr noundef, i32 noundef) #1

; Function Attrs: noinline nounwind optnone
define dso_local i32 @main() #0 {
entry:
  %retval = alloca i32, align 4
  %P3 = alloca %union.S131, align 16
  store i32 0, ptr %retval, align 4
  call void @llvm.memcpy.p0.p0.i64(ptr align 16 %P3, ptr align 16 @__const.main.P3, i64 16, i1 false)
  %coerce.dive = getelementptr inbounds %union.S131, ptr %P3, i32 0, i32 0
  %0 = load i124, ptr %coerce.dive, align 16
  %coerce.val.ii = zext i124 %0 to i128
  call void @F94(i128 %coerce.val.ii)
  ret i32 0
}

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #2

attributes #0 = { noinline nounwind optnone "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a" }
attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a" }
attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"frame-pointer", i32 1}
!2 = !{!"clang version 19.0.0git (git@github.com:llvm/llvm-project.git 743d090b96e09fe7c2cea60a8962f579684c37ce)"}

This is using %union.S131, which is a 124-bit type, to store the value of P3, but it needs 128 bits of storage to hold the double _Complex value. The memcpy in main copies 16 bytes into an allocation which is only 15 bytes.

The text was updated successfully, but these errors were encountered:

llvmbot · 2024-04-18T13:19:10Z

@llvm/issue-subscribers-backend-aarch64

Author: None (ostannard)

This code is miscompiled when targeting AArch64 at all optimisation levels:

extern void __aeabi_assert(const char *, const char *, int);
#define assert(e) ((e) ? (void)0 : __aeabi_assert(#e, __FILE__, __LINE__))
#include &lt;complex.h&gt;

union S131 {
  double _Complex M0 __attribute__((aligned(16)));
  signed _BitInt(124) M1;
};

void F94(union S131 P3) {
  assert(P3.M0 == 1.0 + 2.0 * _Complex_I);
}

int main() {
  union S131 P3 = {1.0 + 2.0 * _Complex_I};
  F94(P3);

  return 0;
}

I think that the IR generated by clang is wrong, before any optimisations:

$ /work/llvm/build/bin/clang --target=aarch64--none-elf -march=armv8-a -c test.c -o - -S -O0 -emit-llvm
; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-none-elf"

%union.S131 = type { i124 }

@.str = private unnamed_addr constant [21 x i8] c"P3.M0 == 1.0 + 2.0if\00", align 1
@.str.1 = private unnamed_addr constant [7 x i8] c"test.c\00", align 1
@<!-- -->__const.main.P3 = private unnamed_addr constant { { double, double } } { { double, double } { double 1.000000e+00, double 2.000000e+00 } }, align 16

; Function Attrs: noinline nounwind optnone
define dso_local void @<!-- -->F94(i128 %P3.coerce) #<!-- -->0 {
entry:
  %P3 = alloca %union.S131, align 16
  %coerce.dive = getelementptr inbounds %union.S131, ptr %P3, i32 0, i32 0
  %coerce.val.ii = trunc i128 %P3.coerce to i124
  store i124 %coerce.val.ii, ptr %coerce.dive, align 16
  %P3.realp = getelementptr inbounds { double, double }, ptr %P3, i32 0, i32 0
  %P3.real = load double, ptr %P3.realp, align 16
  %P3.imagp = getelementptr inbounds { double, double }, ptr %P3, i32 0, i32 1
  %P3.imag = load double, ptr %P3.imagp, align 8
  %cmp.r = fcmp oeq double %P3.real, 1.000000e+00
  %cmp.i = fcmp oeq double %P3.imag, 2.000000e+00
  %and.ri = and i1 %cmp.r, %cmp.i
  br i1 %and.ri, label %cond.true, label %cond.false

cond.true:                                        ; preds = %entry
  br label %cond.end

cond.false:                                       ; preds = %entry
  call void @<!-- -->__aeabi_assert(ptr noundef @.str, ptr noundef @.str.1, i32 noundef 10)
  br label %cond.end

cond.end:                                         ; preds = %cond.false, %cond.true
  ret void
}

declare dso_local void @<!-- -->__aeabi_assert(ptr noundef, ptr noundef, i32 noundef) #<!-- -->1

; Function Attrs: noinline nounwind optnone
define dso_local i32 @<!-- -->main() #<!-- -->0 {
entry:
  %retval = alloca i32, align 4
  %P3 = alloca %union.S131, align 16
  store i32 0, ptr %retval, align 4
  call void @<!-- -->llvm.memcpy.p0.p0.i64(ptr align 16 %P3, ptr align 16 @<!-- -->__const.main.P3, i64 16, i1 false)
  %coerce.dive = getelementptr inbounds %union.S131, ptr %P3, i32 0, i32 0
  %0 = load i124, ptr %coerce.dive, align 16
  %coerce.val.ii = zext i124 %0 to i128
  call void @<!-- -->F94(i128 %coerce.val.ii)
  ret i32 0
}

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @<!-- -->llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #<!-- -->2

attributes #<!-- -->0 = { noinline nounwind optnone "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a" }
attributes #<!-- -->1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+v8a" }
attributes #<!-- -->2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 7, !"frame-pointer", i32 1}
!2 = !{!"clang version 19.0.0git (git@<!-- -->github.com:llvm/llvm-project.git 743d090b96e09fe7c2cea60a8962f579684c37ce)"}

This is using %union.S131, which is a 124-bit type, to store the value of P3, but it needs 128 bits of storage to hold the double _Complex value. The memcpy in main copies 16 bytes into an allocation which is only 15 bytes.

dc03-work · 2024-04-18T14:26:37Z

This seems reproducible on x86 clang as well, so I don't think its a backend issue: https://godbolt.org/z/8c8deKPzq.

efriedma-quic · 2024-04-19T22:43:51Z

There's no such thing as an allocation of 124 bits. An alloca's size is always a number in bytes, that size is always a multiple of the type's alignment, and it's legal to read and write the padding.

So the allocation is 128 bits, i.e. 16 bytes, and the memcpy is fine.

(It's possible there's some optimization in LLVM that doesn't handle this construct correctly, but that would be a separate issue. See also #64081.)

Fznamznon · 2024-07-22T11:10:47Z

Note, after #91364 it is allocating i128 https://godbolt.org/z/zfxvPEf3o .

ostannard added backend:AArch64 c23 miscompilation labels Apr 18, 2024

cor3ntin closed this as completed Aug 7, 2024

ostannard added the generated by fuzzer label Mar 21, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AArch64] Miscompilation of struct containing complex double and BitInt #89230

[AArch64] Miscompilation of struct containing complex double and BitInt #89230

ostannard commented Apr 18, 2024

llvmbot commented Apr 18, 2024

dc03-work commented Apr 18, 2024

efriedma-quic commented Apr 19, 2024

Fznamznon commented Jul 22, 2024

[AArch64] Miscompilation of struct containing complex double and BitInt #89230

[AArch64] Miscompilation of struct containing complex double and BitInt #89230

Comments

ostannard commented Apr 18, 2024

llvmbot commented Apr 18, 2024

dc03-work commented Apr 18, 2024

efriedma-quic commented Apr 19, 2024

Fznamznon commented Jul 22, 2024