Rust generates unaligned loads when unpacking an aligned struct #54028

GabrielMajeri · 2018-09-07T10:39:21Z

While playing around with some complex numbers (structures with two f64s), I noticed Rust was generating unaligned loads, so I added the #[repr(align(16))] parameter to them. But Rust was still generating unaligned operations.

I redeclared the parameter as a local variable, and it got fixed. It only seems to happen when using a pattern to unpack the fields of an aligned structure.

Minimal repro (also on play.rust-lang.org):

#[repr(align(16))]
pub struct Complex {
    real: f64,
    imag: f64,
}

pub struct Compound {
    a: Complex,
}

pub fn load_unaligned(cmp: Compound) -> f64 {
    let Complex { real, imag } = cmp.a;
    
    (real * real) + (imag * imag)
}

pub fn load_aligned(cmp: Compound) -> f64 {
    let cmp = cmp;
    let Complex { real, imag } = cmp.a;

    (real * real) + (imag * imag)
}

While the functions look nearly the same, the disassembly differs:

playground::load_unaligned:
       ; unaligned loads
	movupd	xmm1, xmmword ptr [rdi]
	mulpd	xmm1, xmm1
	movapd	xmm0, xmm1
	movhlps	xmm0, xmm0
	addsd	xmm0, xmm1
	ret

playground::load_aligned:
       ; this one generates faster, aligned loads:
	movapd	xmm1, xmmword ptr [rdi]
	mulpd	xmm1, xmm1
	movapd	xmm0, xmm1
	movhlps	xmm0, xmm0
	addsd	xmm0, xmm1
	ret

This bug occurs on stable 1.28, beta and nightly.

The text was updated successfully, but these errors were encountered:

cuviper · 2018-09-07T17:58:09Z

In the debug version of the LLVM IR, only the latter shows an explicit align 16. They both have dereferenceable(16) on the argument, but I think that's only size, not alignment.

; playground::load_unaligned
; Function Attrs: uwtable
define double @_ZN10playground14load_unaligned17hb6493993a7b635a6E(%Compound* noalias nocapture dereferenceable(16) %cmp) unnamed_addr #0 !dbg !4 {
start:
  %imag = alloca double, align 8, !dbg !17
  %real = alloca double, align 8, !dbg !17
  call void @llvm.dbg.declare(metadata %Compound* %cmp, metadata !18, metadata !DIExpression()), !dbg !17
  call void @llvm.dbg.declare(metadata double* %real, metadata !19, metadata !DIExpression()), !dbg !21
  call void @llvm.dbg.declare(metadata double* %imag, metadata !22, metadata !DIExpression()), !dbg !21
  %0 = bitcast %Compound* %cmp to %Complex*, !dbg !23
  %1 = bitcast %Complex* %0 to double*, !dbg !23
  %2 = load double, double* %1, align 8, !dbg !23
  store double %2, double* %real, align 8, !dbg !23
  %3 = bitcast %Compound* %cmp to %Complex*, !dbg !23
  %4 = getelementptr inbounds %Complex, %Complex* %3, i32 0, i32 3, !dbg !23
  %5 = load double, double* %4, align 8, !dbg !23
  store double %5, double* %imag, align 8, !dbg !23
  %6 = load double, double* %real, align 8, !dbg !24
  %7 = load double, double* %real, align 8, !dbg !24
  %8 = fmul double %6, %7, !dbg !24
  %9 = load double, double* %imag, align 8, !dbg !24
  %10 = load double, double* %imag, align 8, !dbg !24
  %11 = fmul double %9, %10, !dbg !24
  %12 = fadd double %8, %11, !dbg !24
  ret double %12, !dbg !25
}

; playground::load_aligned
; Function Attrs: uwtable
define double @_ZN10playground12load_aligned17h2a5b875154d5e906E(%Compound* noalias nocapture dereferenceable(16) %cmp) unnamed_addr #0 !dbg !26 {
start:
  %imag = alloca double, align 8, !dbg !27
  %real = alloca double, align 8, !dbg !27
  %_3 = alloca %Compound, align 16, !dbg !27
  %cmp1 = alloca %Compound, align 16, !dbg !27
  call void @llvm.dbg.declare(metadata %Compound* %cmp, metadata !28, metadata !DIExpression()), !dbg !27
  call void @llvm.dbg.declare(metadata %Compound* %cmp1, metadata !29, metadata !DIExpression()), !dbg !31
  call void @llvm.dbg.declare(metadata double* %real, metadata !32, metadata !DIExpression()), !dbg !34
  call void @llvm.dbg.declare(metadata double* %imag, metadata !35, metadata !DIExpression()), !dbg !34
  %0 = bitcast %Compound* %cmp to i8*, !dbg !36
  %1 = bitcast %Compound* %_3 to i8*, !dbg !36
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %0, i64 16, i32 16, i1 false), !dbg !36
  %2 = bitcast %Compound* %_3 to i8*, !dbg !36
  %3 = bitcast %Compound* %cmp1 to i8*, !dbg !36
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %3, i8* %2, i64 16, i32 16, i1 false), !dbg !36
  %4 = bitcast %Compound* %cmp1 to %Complex*, !dbg !37
  %5 = bitcast %Complex* %4 to double*, !dbg !37
  %6 = load double, double* %5, align 8, !dbg !37
  store double %6, double* %real, align 8, !dbg !37
  %7 = bitcast %Compound* %cmp1 to %Complex*, !dbg !37
  %8 = getelementptr inbounds %Complex, %Complex* %7, i32 0, i32 3, !dbg !37
  %9 = load double, double* %8, align 8, !dbg !37
  store double %9, double* %imag, align 8, !dbg !37
  %10 = load double, double* %real, align 8, !dbg !38
  %11 = load double, double* %real, align 8, !dbg !38
  %12 = fmul double %10, %11, !dbg !38
  %13 = load double, double* %imag, align 8, !dbg !38
  %14 = load double, double* %imag, align 8, !dbg !38
  %15 = fmul double %13, %14, !dbg !38
  %16 = fadd double %12, %15, !dbg !38
  ret double %16, !dbg !39
}

GabrielMajeri · 2018-09-08T12:58:44Z

I've been able to get an ever more minimal repro:

#[repr(align(16))]
pub struct Complex {
    real: f64,
    imag: f64,
}

pub fn load_unaligned(z: Complex) -> f64 {
    let Complex { real, imag } = z;
    
    (real * real) + (imag * imag)
}

eddyb · 2018-09-08T13:07:48Z

Does it work as expected if instead of using #[repr(align(16))] you do this?

#[cfg(target_arch = "x86")]
use std::arch::x86::__m128d as f64x2;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::__m128d as f64x2;
pub struct Complex {
    _dummy: [f64x2; 0],
    real: f64,
    imag: f64,
}

If so, then we could use vectors instead of integers for hinting higher alignments to LLVM, for both intra-struct/enum/union padding, and pointers without a LLVM pointee type.

GabrielMajeri · 2018-09-08T13:20:31Z

@eddyb No, unfortunately, still generates unaligned movups (playground link for this version)

eddyb · 2018-09-08T13:52:53Z

I left a comment on #53998, about a potential change that would help here, I think. That is, we can be less conservative, and preserve knowledge of a higher alignment for struct field accesses.

cc @rust-lang/compiler Does anyone have opinions whether we should generate instructions with higher alignment, e.g. load f64 ... align 16, if we statically know that alignment is guaranteed, based on the field offset and the alignment of the struct? (e.g. offset 0 within 16-aligned struct)

nagisa · 2018-09-12T12:25:05Z

Does anyone have opinions

We should strive to provide as much information to LLVM as possible. Telling LLVM about greatest alignment alignment of fields gives it extra information it can work with.

The interesting, and useful, scenario would be something like this:

#[repr(align(16))]
struct Foo {
    // ...
    field: PackedType, // Laid out at the offset 16.
}

#[packed] // so alignment = 1
struct PackedType { ... }

which would allow LLVM to use aligned SSE instructions for loading field.

Fix rust-lang#54028

@eddyb

Rely only on base alignment and offset for computing field alignment Fix #54028 r? @eddyb

cuviper added A-codegen Area: Code generation T-compiler Relevant to the compiler team, which will review and decide on the PR/issue. C-bug Category: This is a bug. labels Sep 7, 2018

eddyb mentioned this issue Sep 8, 2018

rustc_codegen_llvm: don't assume offsets are always aligned. #53998

Merged

eddyb removed the C-bug Category: This is a bug. label Sep 8, 2018

AstralSorceress added a commit to AstralSorceress/rust that referenced this issue Sep 25, 2018

Rely only on base alignment and offset for computing field alignment

06b8c3e

Fix rust-lang#54028

AstralSorceress mentioned this issue Sep 25, 2018

Rely only on base alignment and offset for computing field alignment #54547

Merged

bors added a commit that referenced this issue Sep 28, 2018

Auto merge of #54547 - AstralSorcerer:issue-54028, r=eddyb

c222479

Rely only on base alignment and offset for computing field alignment Fix #54028 r? @eddyb

bors closed this as completed in #54547 Sep 28, 2018

RalfJung mentioned this issue Jul 28, 2023

Meeting proposal: When accessing a field that is at a "very aligned" offset, can the required alignment be higher than the type of the field indicates? rust-lang/opsem-team#11

Closed

RalfJung mentioned this issue Aug 25, 2023

replace 'UB on raw ptr deref' with UB on place projection/access rust-lang/reference#1387

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Rust generates unaligned loads when unpacking an aligned struct #54028

Rust generates unaligned loads when unpacking an aligned struct #54028

GabrielMajeri commented Sep 7, 2018 •

edited

Loading

cuviper commented Sep 7, 2018

GabrielMajeri commented Sep 8, 2018

eddyb commented Sep 8, 2018

GabrielMajeri commented Sep 8, 2018

eddyb commented Sep 8, 2018 •

edited

Loading

nagisa commented Sep 12, 2018

Rust generates unaligned loads when unpacking an aligned struct #54028

Rust generates unaligned loads when unpacking an aligned struct #54028

Comments

GabrielMajeri commented Sep 7, 2018 • edited Loading

cuviper commented Sep 7, 2018

GabrielMajeri commented Sep 8, 2018

eddyb commented Sep 8, 2018

GabrielMajeri commented Sep 8, 2018

eddyb commented Sep 8, 2018 • edited Loading

nagisa commented Sep 12, 2018

GabrielMajeri commented Sep 7, 2018 •

edited

Loading

eddyb commented Sep 8, 2018 •

edited

Loading