Skip to content

Commit

Permalink
Fix ABI compliance when passing structs around
Browse files Browse the repository at this point in the history
When accepting or returning structs, the data must be copied using
memcpy into a temporary slot, this slot must then be loaded into the
final slot. Without this, LLVM appears to generate the wrong code on
platforms such as ARM64.

This fixes #792.
  • Loading branch information
yorickpeterse committed Jan 10, 2025
1 parent 71a9d23 commit 4870f13
Show file tree
Hide file tree
Showing 5 changed files with 280 additions and 29 deletions.
25 changes: 18 additions & 7 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,12 @@ jobs:
key: amd64-linux-gnu-${{ hashFiles('Cargo.lock', 'rust-toolchain.toml') }}
- name: Run compiler tests
run: cargo test
# We run tests with and without optimizations, such that we can catch any
# potential miscompilations introduced by optimizations. We only do this
# for this particular target as our optimizations aren't target specific.
- name: Run stdlib tests with optimizations
- name: Run stdlib tests with default optimizations
run: 'cd std && cargo run -- test'
- name: Run stdlib tests without optimizations
run: 'cd std && cargo run -- test --opt=none'
- name: Run stdlib tests with aggressive optimizations
run: 'cd std && cargo run -- test --opt=aggressive'

amd64-linux-musl:
timeout-minutes: 15
Expand All @@ -97,8 +96,12 @@ jobs:
key: amd64-linux-musl-${{ hashFiles('Cargo.lock', 'rust-toolchain.toml') }}
- name: Run compiler tests
run: cargo test
- name: Run stdlib tests
- name: Run stdlib tests with default optimizations
run: 'cd std && cargo run -- test'
- name: Run stdlib tests without optimizations
run: 'cd std && cargo run -- test --opt=none'
- name: Run stdlib tests with aggressive optimizations
run: 'cd std && cargo run -- test --opt=aggressive'

amd64-mac-native:
timeout-minutes: 15
Expand All @@ -117,8 +120,12 @@ jobs:
run: ./ci/mac.sh
- name: Run compiler tests
run: cargo test
- name: Run stdlib tests
- name: Run stdlib tests with default optimizations
run: 'cd std && cargo run -- test'
- name: Run stdlib tests without optimizations
run: 'cd std && cargo run -- test --opt=none'
- name: Run stdlib tests with aggressive optimizations
run: 'cd std && cargo run -- test --opt=aggressive'

arm64-mac-native:
timeout-minutes: 15
Expand All @@ -137,8 +144,12 @@ jobs:
run: ./ci/mac.sh
- name: Run compiler tests
run: cargo test
- name: Run stdlib tests
- name: Run stdlib tests with default optimizations
run: 'cd std && cargo run -- test'
- name: Run stdlib tests without optimizations
run: 'cd std && cargo run -- test --opt=none'
- name: Run stdlib tests with aggressive optimizations
run: 'cd std && cargo run -- test --opt=aggressive'

amd64-freebsd-native:
timeout-minutes: 15
Expand Down
43 changes: 42 additions & 1 deletion compiler/src/llvm/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ use inkwell::debug_info::{
DWARFSourceLanguage, DebugInfoBuilder,
};
use inkwell::module::{FlagBehavior, Module as InkwellModule};
use inkwell::types::{ArrayType, BasicType, FunctionType, StructType};
use inkwell::targets::TargetData;
use inkwell::types::{
ArrayType, BasicType, BasicTypeEnum, FunctionType, StructType,
};
use inkwell::values::{
AggregateValue, ArrayValue, BasicMetadataValueEnum, BasicValue,
BasicValueEnum, CallSiteValue, FloatValue, FunctionValue,
Expand Down Expand Up @@ -152,6 +155,44 @@ impl<'ctx> Builder<'ctx> {
self.store(field_ptr, value);
}

pub(crate) fn memcpy(
&self,
target_data: &TargetData,
from: PointerValue<'ctx>,
from_type: BasicTypeEnum<'ctx>,
to: PointerValue<'ctx>,
to_type: BasicTypeEnum<'ctx>,
) {
let len = self.u64_literal(target_data.get_abi_size(&to_type));
let from_align = target_data.get_abi_alignment(&from_type);
let to_align = target_data.get_abi_alignment(&to_type);

self.inner.build_memcpy(to, to_align, from, from_align, len).unwrap();
}

/// Copies a struct to a stack slot using memcpy.
///
/// When passing structs as arguments or returning them, their types may
/// differ from the destination type. In addition, the ABI may require that
/// the data be copied explicitly instead of being passed as-is.
///
/// Using this method we can easily handle these cases and leave it up to
/// LLVM to optimize and generate the correct code. The approach taken here
/// is similar to what clang and Rust do.
pub(crate) fn copy_struct(
&self,
target_data: &TargetData,
from: BasicValueEnum<'ctx>,
to: PointerValue<'ctx>,
to_type: BasicTypeEnum<'ctx>,
) {
let from_type = from.get_type();
let tmp = self.new_stack_slot(from_type);

self.store(tmp, from);
self.memcpy(target_data, tmp, from_type, to, to_type);
}

pub(crate) fn store<V: BasicValue<'ctx>>(
&self,
variable: PointerValue<'ctx>,
Expand Down
188 changes: 176 additions & 12 deletions compiler/src/llvm/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use inkwell::attributes::Attribute;
use inkwell::basic_block::BasicBlock;
use inkwell::builder::Builder;
use inkwell::module::Module;
use inkwell::targets::TargetData;
use inkwell::types::{
AnyTypeEnum, ArrayType, BasicType, BasicTypeEnum, FloatType, IntType,
PointerType, StructType, VoidType,
Expand All @@ -18,6 +19,98 @@ use types::{
FLOAT_ID, INT_ID, NIL_ID,
};

#[derive(Copy, Clone, Eq, PartialEq, Debug)]
enum Class {
Int(u64),
Float(u64),
}

impl Class {
fn to_llvm_type<'c>(self, context: &'c Context) -> BasicTypeEnum<'c> {
match self {
Class::Int(bytes) => {
context.custom_int(bytes as u32 * 8).as_basic_type_enum()
}
Class::Float(4) => context.f32_type().as_basic_type_enum(),
Class::Float(_) => context.f64_type().as_basic_type_enum(),
}
}
}

fn classify<'a>(
target_data: &TargetData,
typ: BasicTypeEnum<'a>,
classes: &mut Vec<Class>,
) {
match typ {
BasicTypeEnum::StructType(t) => {
for field in t.get_field_types_iter() {
classify(target_data, field, classes);
}
}
BasicTypeEnum::ArrayType(t) => {
let field = t.get_element_type();

for _ in 0..t.len() {
classify(target_data, field, classes);
}
}
BasicTypeEnum::FloatType(t) => {
classes.push(Class::Float(target_data.get_abi_size(&t)))
}
BasicTypeEnum::IntType(t) => {
classes.push(Class::Int(target_data.get_abi_size(&t)))
}
BasicTypeEnum::PointerType(t) => {
classes.push(Class::Int(target_data.get_abi_size(&t)))
}
BasicTypeEnum::VectorType(_) => {
panic!("vector types are not yet supported")
}
}
}

fn classify_struct<'a>(
target_data: &TargetData,
typ: StructType<'a>,
) -> (Class, Class) {
let mut classes = Vec::new();
let align = target_data.get_abi_alignment(&typ) as u64;

classify(target_data, typ.as_basic_type_enum(), &mut classes);
combine(classes, align)
}

fn combine(classes: Vec<Class>, align: u64) -> (Class, Class) {
let mut a = 0;
let mut a_float = true;
let mut b = 0;
let mut b_float = true;

for cls in classes {
match cls {
Class::Int(v) if a + v <= 8 => {
a += v;
a_float = false;
}
Class::Float(v) if a + v <= 8 => a += v,
Class::Int(v) => {
b += v;
b_float = false;
}
Class::Float(v) => b += v,
}
}

a = max(a, align);
b = max(b, align);

(
if a_float { Class::Float(a) } else { Class::Int(a) },
if b_float { Class::Float(b) } else { Class::Int(b) },
)
}

fn size_in_bits(bytes: u32) -> u32 {
// LLVM crashes when passing/returning zero sized types (e.g. structs). In
// addition, such types aren't useful in Inko, so we enforce a minimum size
Expand Down Expand Up @@ -228,15 +321,10 @@ impl Context {

ArgumentType::Regular(bits.as_basic_type_enum())
} else if bytes <= 16 {
// The AMD64 ABI doesn't handle types such as
// `{ i16, i64 }`. While it does handle `{ i64, i16 }`, this
// requires re-ordering the fields and their corresponding
// access sites.
//
// To avoid the complexity of that we take the same approach
// as Rust: if the struct is larger than 8 bytes, we turn it
// into two 64 bits values.
ArgumentType::Regular(self.two_words().as_basic_type_enum())
ArgumentType::Regular(
self.small_amd64_struct(layouts, typ)
.as_basic_type_enum(),
)
} else {
ArgumentType::StructValue(typ)
}
Expand All @@ -245,6 +333,8 @@ impl Context {
if bytes <= 8 {
ArgumentType::Regular(self.i64_type().as_basic_type_enum())
} else if bytes <= 16 {
// TODO: handle f32x4 and the likes, even when larger than
// 16 bytes
ArgumentType::Regular(self.two_words().as_basic_type_enum())
} else {
// clang and Rust don't use "byval" for ARM64 when the
Expand Down Expand Up @@ -287,9 +377,21 @@ impl Context {
let bytes = layouts.target_data.get_abi_size(&typ) as u32;

match state.config.target.arch {
// For both AMD64 and ARM64 the way structs are returned is the
// same. For more details, refer to argument_type().
Architecture::Amd64 | Architecture::Arm64 => {
Architecture::Amd64 => {
if bytes <= 8 {
let bits = self.custom_int(size_in_bits(bytes));

ReturnType::Regular(bits.as_basic_type_enum())
} else if bytes <= 16 {
ReturnType::Regular(
self.small_amd64_struct(layouts, typ)
.as_basic_type_enum(),
)
} else {
ReturnType::Struct(typ)
}
}
Architecture::Arm64 => {
if bytes <= 8 {
let bits = self.custom_int(size_in_bits(bytes));

Expand All @@ -302,6 +404,16 @@ impl Context {
}
}
}

fn small_amd64_struct<'ctx>(
&'ctx self,
layouts: &Layouts<'ctx>,
typ: StructType<'ctx>,
) -> StructType<'ctx> {
let (a, b) = classify_struct(layouts.target_data, typ);

self.struct_type(&[a.to_llvm_type(self), b.to_llvm_type(self)])
}
}

#[cfg(test)]
Expand All @@ -318,4 +430,56 @@ mod tests {
assert_eq!(ctx.rust_string_type().len(), 24);
assert_eq!(ctx.rust_vec_type().len(), 24);
}

#[test]
fn test_combine() {
assert_eq!(
combine(vec![Class::Float(4), Class::Float(4)], 4),
(Class::Float(8), Class::Float(4))
);
assert_eq!(
combine(vec![Class::Float(4), Class::Float(4), Class::Int(4)], 4),
(Class::Float(8), Class::Int(4))
);
assert_eq!(
combine(
vec![
Class::Float(4),
Class::Float(4),
Class::Int(4),
Class::Int(4)
],
4
),
(Class::Float(8), Class::Int(8))
);
assert_eq!(
combine(
vec![
Class::Float(4),
Class::Int(4),
Class::Int(4),
Class::Int(4)
],
4
),
(Class::Int(8), Class::Int(8))
);
assert_eq!(
combine(
vec![
Class::Int(4),
Class::Float(4),
Class::Int(4),
Class::Int(4)
],
4
),
(Class::Int(8), Class::Int(8))
);
assert_eq!(
combine(vec![Class::Int(1), Class::Int(4), Class::Int(8)], 8),
(Class::Int(8), Class::Int(8))
);
}
}
21 changes: 20 additions & 1 deletion compiler/src/llvm/passes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1013,6 +1013,13 @@ impl<'shared, 'module, 'ctx> LowerMethod<'shared, 'module, 'ctx> {
let val = self.builder.load(typ, arg.into_pointer_value());

self.builder.store(var, val);
} else if arg.is_struct_value() {
self.builder.copy_struct(
self.layouts.target_data,
arg,
var,
typ,
);
} else {
self.builder.store(var, arg);
}
Expand Down Expand Up @@ -1805,6 +1812,8 @@ impl<'shared, 'module, 'ctx> LowerMethod<'shared, 'module, 'ctx> {
// For example, if the struct is `{ i64 }` we may
// in fact return a value of type `i64`. While both have the
// same layout, they're not compatible at the LLVM level.
//
// TODO: use memcpy when returning structs
let typ = self
.builder
.function
Expand Down Expand Up @@ -2753,6 +2762,7 @@ impl<'shared, 'module, 'ctx> LowerMethod<'shared, 'module, 'ctx> {
}

let reg_var = self.variables[&register];
let reg_typ = self.variable_types[&register];
let call_site = match kind {
CallKind::Direct(f) => self.builder.direct_call(f, &args),
CallKind::Indirect(t, f) => self.builder.indirect_call(t, f, &args),
Expand All @@ -2765,7 +2775,16 @@ impl<'shared, 'module, 'ctx> LowerMethod<'shared, 'module, 'ctx> {
if layout.returns.is_regular() {
let val = call_site.try_as_basic_value().left().unwrap();

self.builder.store(reg_var, val);
if layout.returns.is_struct() {
self.builder.copy_struct(
self.layouts.target_data,
val,
reg_var,
reg_typ,
);
} else {
self.builder.store(reg_var, val);
}
} else if let Some((typ, tmp)) = sret {
let val = self.builder.load(typ, tmp);

Expand Down
Loading

0 comments on commit 4870f13

Please sign in to comment.