Skip to content

Commit

Permalink
Implement AArch64 ABI
Browse files Browse the repository at this point in the history
  • Loading branch information
yuyichao committed Jan 1, 2016
1 parent f8c6aac commit b72c604
Show file tree
Hide file tree
Showing 8 changed files with 308 additions and 10 deletions.
282 changes: 282 additions & 0 deletions src/abi_aarch64.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,282 @@
// This file is a part of Julia. License is MIT: http://julialang.org/license

//===----------------------------------------------------------------------===//
//
// The ABI implementation used for AArch64 targets.
//
//===----------------------------------------------------------------------===//
//
// The Procedure Call Standard can be found here:
// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf
//
//===----------------------------------------------------------------------===//

namespace {

typedef bool AbiState;
static const AbiState default_abi_state = 0;

static Type *get_llvm_fptype(jl_datatype_t *dt)
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
if (dt->mutabl || jl_datatype_nfields(dt) >= 0)
return NULL;
Type *lltype;
// Check size first since it's cheaper.
switch (dt->size) {
case 2:
lltype = T_float16;
break;
case 4:
lltype = T_float32;
break;
case 8:
lltype = T_float64;
break;
case 16:
lltype = T_float128;
break;
default:
return NULL;
}
return jl_is_floattype(dt) ? lltype : NULL;
}

// Whether a type is a homogeneous floating-point aggregates (HFA) or a
// homogeneous short-vector aggregates (HVA). Returns the number of members.
// We only handle HFA of HP, SP and DP here since these are the only ones we
// have (no QP).
static size_t isHFAorHVA(jl_datatype_t *dt)
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)

// An Homogeneous Floating-point Aggregate (HFA) is an Homogeneous Aggregate
// with a Fundamental Data Type that is a Floating-Point type and at most
// four uniquely addressable members.
// An Homogeneous Short-Vector Aggregate (HVA) is an Homogeneous Aggregate
// with a Fundamental Data Type that is a Short-Vector type and at most four
// uniquely addressable members.
size_t members = jl_datatype_nfields(dt);
if (members < 1 || members > 4)
return 0;
// There's at least one member
jl_value_t *ftype = jl_field_type(dt, 0);
if (!get_llvm_fptype((jl_datatype_t*)ftype))
return 0;
for (size_t i = 1;i < members;i++) {
if (ftype != jl_field_type(dt, i)) {
return 0;
}
}
return members;
}

void needPassByRef(AbiState*, jl_value_t *ty, bool *byRef, bool*)
{
// Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty)
jl_datatype_t *dt = (jl_datatype_t*)ty;
// B.2
// If the argument type is an HFA or an HVA, then the argument is used
// unmodified.
if (isHFAorHVA(dt))
return;
// B.3
// If the argument type is a Composite Type that is larger than 16 bytes,
// then the argument is copied to memory allocated by the caller and the
// argument is replaced by a pointer to the copy.
// We only check for the total size and not whether it is a composite type
// since there's no corresponding C type and we just treat such large
// bitstype as a composite type of the right size.
*byRef = dt->size > 16;
// B.4
// If the argument type is a Composite Type then the size of the argument
// is rounded up to the nearest multiple of 8 bytes.
}

bool need_private_copy(jl_value_t*, bool)
{
return false;
}

// Determine which kind of register the argument will be passed in and
// if the argument has to be passed on stack (including by reference).
//
// If the argument should be passed in SIMD and floating-point registers,
// we may need to rewrite the argument types to [n x ftype].
// If the argument should be passed in general purpose registers, we may need
// to rewrite the argument types to [n x i64].
//
// If the argument has to be passed on stack, we need to use sret.
//
// All the out parameters should be default to `false`.
static void classify_arg(jl_value_t *ty, bool *fpreg, bool *onstack,
bool *need_rewrite)
{
// Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty)
jl_datatype_t *dt = (jl_datatype_t*)ty;

// Based on section 5.4 C of the Procedure Call Standard
// C.1
// If the argument is a Half-, Single-, Double- or Quad- precision
// Floating-point or Short Vector Type and the NSRN is less than 8, then
// the argument is allocated to the least significant bits of register
// v[NSRN]. The NSRN is incremented by one. The argument has now been
// allocated.
// Note that this is missing QP float as well as short vector types since we
// don't really have those types.
if (get_llvm_fptype(dt)) {
*fpreg = true;
return;
}

// C.2
// If the argument is an HFA or an HVA and there are sufficient
// unallocated SIMD and Floating-point registers (NSRN + number of
// members <= 8), then the argument is allocated to SIMD and
// Floating-point Registers (with one register per member of the HFA
// or HVA). The NSRN is incremented by the number of registers used.
// The argument has now been allocated.
if (isHFAorHVA(dt)) { // HFA and HVA have <= 4 members
*fpreg = true;
*need_rewrite = true;
return;
}

// Check if the argument needs to be passed by reference. This should be
// done before starting step C but we do this here to avoid checking for
// HFA and HVA twice. We don't check whether it is a composite type.
// See `needPassByRef` above.
if (dt->size > 16) {
*onstack = true;
return;
}

// C.3
// If the argument is an HFA or an HVA then the NSRN is set to 8 and the
// size of the argument is rounded up to the nearest multiple of 8 bytes.
// C.4
// If the argument is an HFA, an HVA, a Quad-precision Floating-point or
// Short Vector Type then the NSAA is rounded up to the larger of 8 or
// the Natural Alignment of the argument’s type.
// C.5
// If the argument is a Half- or Single- precision Floating Point type,
// then the size of the argument is set to 8 bytes. The effect is as if
// the argument had been copied to the least significant bits of a 64-bit
// register and the remaining bits filled with unspecified values.
// C.6
// If the argument is an HFA, an HVA, a Half-, Single-, Double- or
// Quad- precision Floating-point or Short Vector Type, then the argument
// is copied to memory at the adjusted NSAA. The NSAA is incremented
// by the size of the argument. The argument has now been allocated.
// <already included in the C.2 case above>
// C.7
// If the argument is an Integral or Pointer Type, the size of the
// argument is less than or equal to 8 bytes and the NGRN is less than 8,
// the argument is copied to the least significant bits in x[NGRN].
// The NGRN is incremented by one. The argument has now been allocated.
// Here we treat any bitstype of the right size as integers or pointers
// This is needed for types like Cstring which should be treated as
// pointers. We don't need to worry about floating points here since they
// are handled above.
if (jl_is_immutable(dt) && jl_datatype_nfields(dt) == 0 &&
(dt->size == 1 || dt->size == 2 || dt->size == 4 ||
dt->size == 8 || dt->size == 16))
return;

// C.8
// If the argument has an alignment of 16 then the NGRN is rounded up to
// the next even number.
// C.9
// If the argument is an Integral Type, the size of the argument is equal
// to 16 and the NGRN is less than 7, the argument is copied to x[NGRN]
// and x[NGRN+1]. x[NGRN] shall contain the lower addressed double-word
// of the memory representation of the argument. The NGRN is incremented
// by two. The argument has now been allocated.
// <merged into C.7 above>
// C.10
// If the argument is a Composite Type and the size in double-words of
// the argument is not more than 8 minus NGRN, then the argument is
// copied into consecutive general-purpose registers, starting at x[NGRN].
// The argument is passed as though it had been loaded into the registers
// from a double-word-aligned address with an appropriate sequence of LDR
// instructions loading consecutive registers from memory (the contents of
// any unused parts of the registers are unspecified by this standard).
// The NGRN is incremented by the number of registers used. The argument
// has now been allocated.
// We don't check for composite types here since the ones that have
// corresponding C types are already handled and we just treat the ones
// with weird size as a black box composite type.
// The type can fit in 8 x 8 bytes since it is handled by
// need_pass_by_ref otherwise.
*need_rewrite = true;

// C.11
// The NGRN is set to 8.
// C.12
// The NSAA is rounded up to the larger of 8 or the Natural Alignment
// of the argument’s type.
// C.13
// If the argument is a composite type then the argument is copied to
// memory at the adjusted NSAA. The NSAA is incremented by the size of
// the argument. The argument has now been allocated.
// <handled by C.10 above>
// C.14
// If the size of the argument is less than 8 bytes then the size of the
// argument is set to 8 bytes. The effect is as if the argument was
// copied to the least significant bits of a 64-bit register and the
// remaining bits filled with unspecified values.
// C.15
// The argument is copied to memory at the adjusted NSAA. The NSAA is
// incremented by the size of the argument. The argument has now been
// allocated.
// <handled by C.10 above>
}

bool use_sret(AbiState*, jl_value_t *ty)
{
// Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty)
// Section 5.5
// If the type, T, of the result of a function is such that
//
// void func(T arg)
//
// would require that arg be passed as a value in a register (or set of
// registers) according to the rules in section 5.4 Parameter Passing,
// then the result is returned in the same registers as would be used for
// such an argument.
bool fpreg = false;
bool onstack = false;
bool need_rewrite = false;
classify_arg(ty, &fpreg, &onstack, &need_rewrite);
return onstack;
}

Type *preferred_llvm_type(jl_value_t *ty, bool)
{
if (!jl_is_datatype(ty) || jl_is_abstracttype(ty))
return NULL;
jl_datatype_t *dt = (jl_datatype_t*)ty;
if (Type *fptype = get_llvm_fptype(dt))
return fptype;
bool fpreg = false;
bool onstack = false;
bool need_rewrite = false;
classify_arg(ty, &fpreg, &onstack, &need_rewrite);
if (!need_rewrite)
return NULL;
if (fpreg) {
// Rewrite to [n x fptype] where n is the number of field
// This only happens for isHFAorHVA
size_t members = jl_datatype_nfields(dt);
assert(members > 0 && members <= 4);
jl_datatype_t *eltype = (jl_datatype_t*)jl_field_type(dt, 0);
return ArrayType::get(get_llvm_fptype(eltype), members);
}
else {
// Rewrite to [n x Int64] where n is the **size in dword**
assert(dt->size <= 16); // Should be pass by reference otherwise
return ArrayType::get(T_int64, (dt->size + 7) >> 3);
}
}

}
6 changes: 6 additions & 0 deletions src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ static Value *runtime_sym_lookup(PointerType *funcptype, const char *f_lib, cons
# else
# include "abi_x86.cpp"
# endif
#elif defined _CPU_AARCH64_
# include "abi_aarch64.cpp"
#else
# warning "ccall is defaulting to llvm ABI, since no platform ABI has been defined for this CPU/OS combination"
# include "abi_llvm.cpp"
Expand Down Expand Up @@ -900,8 +902,12 @@ static std::string generate_func_sig(
// Note that even though the LLVM argument is called ByVal
// this really means that the thing we're passing is pointing to
// the thing we want to pass by value
#ifndef _CPU_AARCH64_
// the aarch64 backend seems to interpret ByVal as
// implicitly passed on stack.
if (byRef)
paramattrs[i + sret].addAttribute(Attribute::ByVal);
#endif
if (inReg)
paramattrs[i + sret].addAttribute(Attribute::InReg);
if (av != Attribute::None)
Expand Down
8 changes: 4 additions & 4 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -923,15 +923,15 @@ JL_DLLEXPORT Type *julia_type_to_llvm(jl_value_t *jt, bool *isboxed)
if (jl_is_floattype(jt)) {
#ifndef DISABLE_FLOAT16
if (nb == 2)
return Type::getHalfTy(jl_LLVMContext);
return T_float16;
else
#endif
if (nb == 4)
return Type::getFloatTy(jl_LLVMContext);
return T_float32;
else if (nb == 8)
return Type::getDoubleTy(jl_LLVMContext);
return T_float64;
else if (nb == 16)
return Type::getFP128Ty(jl_LLVMContext);
return T_float128;
}
return Type::getIntNTy(jl_LLVMContext, nb*8);
}
Expand Down
4 changes: 4 additions & 0 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,10 @@ static IntegerType *T_uint64;
static IntegerType *T_char;
static IntegerType *T_size;

static Type *T_float16;
static Type *T_float32;
static Type *T_float64;
static Type *T_float128;

static Type *T_pint8;
static Type *T_pint16;
Expand Down Expand Up @@ -5512,10 +5514,12 @@ static void init_julia_llvm_env(Module *m)
else
T_size = T_uint32;
T_psize = PointerType::get(T_size, 0);
T_float16 = Type::getHalfTy(getGlobalContext());
T_float32 = Type::getFloatTy(getGlobalContext());
T_pfloat32 = PointerType::get(T_float32, 0);
T_float64 = Type::getDoubleTy(getGlobalContext());
T_pfloat64 = PointerType::get(T_float64, 0);
T_float128 = Type::getFP128Ty(getGlobalContext());
T_void = Type::getVoidTy(jl_LLVMContext);
T_pvoidfunc = FunctionType::get(T_void, /*isVarArg*/false)->getPointerTo();

Expand Down
1 change: 1 addition & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,7 @@ void jl_get_builtin_hooks(void)
jl_uint32_type = (jl_datatype_t*)core("UInt32");
jl_uint64_type = (jl_datatype_t*)core("UInt64");

jl_float16_type = (jl_datatype_t*)core("Float16");
jl_float32_type = (jl_datatype_t*)core("Float32");
jl_float64_type = (jl_datatype_t*)core("Float64");
jl_floatingpoint_type = (jl_datatype_t*)core("AbstractFloat");
Expand Down
12 changes: 6 additions & 6 deletions src/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,15 @@ static Type *FTnbits(size_t nb)
{
#ifndef DISABLE_FLOAT16
if (nb == 16)
return Type::getHalfTy(jl_LLVMContext);
return T_float16;
else
#endif
if (nb == 32)
return Type::getFloatTy(jl_LLVMContext);
return T_float32;
else if (nb == 64)
return Type::getDoubleTy(jl_LLVMContext);
return T_float64;
else if (nb == 128)
return Type::getFP128Ty(jl_LLVMContext);
return T_float128;
else
jl_error("Unsupported Float Size");
}
Expand Down Expand Up @@ -107,7 +107,7 @@ static jl_value_t *JL_JLUINTT(Type *t)
assert(!t->isIntegerTy());
if (t == T_float32) return (jl_value_t*)jl_uint32_type;
if (t == T_float64) return (jl_value_t*)jl_uint64_type;
if (t == Type::getHalfTy(jl_LLVMContext)) return (jl_value_t*)jl_uint16_type;
if (t == T_float16) return (jl_value_t*)jl_uint16_type;
assert(t == T_void);
return jl_bottom_type;
}
Expand All @@ -116,7 +116,7 @@ static jl_value_t *JL_JLSINTT(Type *t)
assert(!t->isIntegerTy());
if (t == T_float32) return (jl_value_t*)jl_int32_type;
if (t == T_float64) return (jl_value_t*)jl_int64_type;
if (t == Type::getHalfTy(jl_LLVMContext)) return (jl_value_t*)jl_int16_type;
if (t == T_float16) return (jl_value_t*)jl_int16_type;
assert(t == T_void);
return jl_bottom_type;
}
Expand Down
Loading

0 comments on commit b72c604

Please sign in to comment.