diff --git a/src/abi_aarch64.cpp b/src/abi_aarch64.cpp new file mode 100644 index 0000000000000..07b60314a1a89 --- /dev/null +++ b/src/abi_aarch64.cpp @@ -0,0 +1,282 @@ +// This file is a part of Julia. License is MIT: http://julialang.org/license + +//===----------------------------------------------------------------------===// +// +// The ABI implementation used for AArch64 targets. +// +//===----------------------------------------------------------------------===// +// +// The Procedure Call Standard can be found here: +// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf +// +//===----------------------------------------------------------------------===// + +namespace { + +typedef bool AbiState; +static const AbiState default_abi_state = 0; + +static Type *get_llvm_fptype(jl_datatype_t *dt) +{ + // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt) + if (dt->mutabl || jl_datatype_nfields(dt) >= 0) + return NULL; + Type *lltype; + // Check size first since it's cheaper. + switch (dt->size) { + case 2: + lltype = T_float16; + break; + case 4: + lltype = T_float32; + break; + case 8: + lltype = T_float64; + break; + case 16: + lltype = T_float128; + break; + default: + return NULL; + } + return jl_is_floattype(dt) ? lltype : NULL; +} + +// Whether a type is a homogeneous floating-point aggregates (HFA) or a +// homogeneous short-vector aggregates (HVA). Returns the number of members. +// We only handle HFA of HP, SP and DP here since these are the only ones we +// have (no QP). +static size_t isHFAorHVA(jl_datatype_t *dt) +{ + // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt) + + // An Homogeneous Floating-point Aggregate (HFA) is an Homogeneous Aggregate + // with a Fundamental Data Type that is a Floating-Point type and at most + // four uniquely addressable members. + // An Homogeneous Short-Vector Aggregate (HVA) is an Homogeneous Aggregate + // with a Fundamental Data Type that is a Short-Vector type and at most four + // uniquely addressable members. + size_t members = jl_datatype_nfields(dt); + if (members < 1 || members > 4) + return 0; + // There's at least one member + jl_value_t *ftype = jl_field_type(dt, 0); + if (!get_llvm_fptype((jl_datatype_t*)ftype)) + return 0; + for (size_t i = 1;i < members;i++) { + if (ftype != jl_field_type(dt, i)) { + return 0; + } + } + return members; +} + +void needPassByRef(AbiState*, jl_value_t *ty, bool *byRef, bool*) +{ + // Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty) + jl_datatype_t *dt = (jl_datatype_t*)ty; + // B.2 + // If the argument type is an HFA or an HVA, then the argument is used + // unmodified. + if (isHFAorHVA(dt)) + return; + // B.3 + // If the argument type is a Composite Type that is larger than 16 bytes, + // then the argument is copied to memory allocated by the caller and the + // argument is replaced by a pointer to the copy. + // We only check for the total size and not whether it is a composite type + // since there's no corresponding C type and we just treat such large + // bitstype as a composite type of the right size. + *byRef = dt->size > 16; + // B.4 + // If the argument type is a Composite Type then the size of the argument + // is rounded up to the nearest multiple of 8 bytes. +} + +bool need_private_copy(jl_value_t*, bool) +{ + return false; +} + +// Determine which kind of register the argument will be passed in and +// if the argument has to be passed on stack (including by reference). +// +// If the argument should be passed in SIMD and floating-point registers, +// we may need to rewrite the argument types to [n x ftype]. +// If the argument should be passed in general purpose registers, we may need +// to rewrite the argument types to [n x i64]. +// +// If the argument has to be passed on stack, we need to use sret. +// +// All the out parameters should be default to `false`. +static void classify_arg(jl_value_t *ty, bool *fpreg, bool *onstack, + bool *need_rewrite) +{ + // Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty) + jl_datatype_t *dt = (jl_datatype_t*)ty; + + // Based on section 5.4 C of the Procedure Call Standard + // C.1 + // If the argument is a Half-, Single-, Double- or Quad- precision + // Floating-point or Short Vector Type and the NSRN is less than 8, then + // the argument is allocated to the least significant bits of register + // v[NSRN]. The NSRN is incremented by one. The argument has now been + // allocated. + // Note that this is missing QP float as well as short vector types since we + // don't really have those types. + if (get_llvm_fptype(dt)) { + *fpreg = true; + return; + } + + // C.2 + // If the argument is an HFA or an HVA and there are sufficient + // unallocated SIMD and Floating-point registers (NSRN + number of + // members <= 8), then the argument is allocated to SIMD and + // Floating-point Registers (with one register per member of the HFA + // or HVA). The NSRN is incremented by the number of registers used. + // The argument has now been allocated. + if (isHFAorHVA(dt)) { // HFA and HVA have <= 4 members + *fpreg = true; + *need_rewrite = true; + return; + } + + // Check if the argument needs to be passed by reference. This should be + // done before starting step C but we do this here to avoid checking for + // HFA and HVA twice. We don't check whether it is a composite type. + // See `needPassByRef` above. + if (dt->size > 16) { + *onstack = true; + return; + } + + // C.3 + // If the argument is an HFA or an HVA then the NSRN is set to 8 and the + // size of the argument is rounded up to the nearest multiple of 8 bytes. + // C.4 + // If the argument is an HFA, an HVA, a Quad-precision Floating-point or + // Short Vector Type then the NSAA is rounded up to the larger of 8 or + // the Natural Alignment of the argument’s type. + // C.5 + // If the argument is a Half- or Single- precision Floating Point type, + // then the size of the argument is set to 8 bytes. The effect is as if + // the argument had been copied to the least significant bits of a 64-bit + // register and the remaining bits filled with unspecified values. + // C.6 + // If the argument is an HFA, an HVA, a Half-, Single-, Double- or + // Quad- precision Floating-point or Short Vector Type, then the argument + // is copied to memory at the adjusted NSAA. The NSAA is incremented + // by the size of the argument. The argument has now been allocated. + // + // C.7 + // If the argument is an Integral or Pointer Type, the size of the + // argument is less than or equal to 8 bytes and the NGRN is less than 8, + // the argument is copied to the least significant bits in x[NGRN]. + // The NGRN is incremented by one. The argument has now been allocated. + // Here we treat any bitstype of the right size as integers or pointers + // This is needed for types like Cstring which should be treated as + // pointers. We don't need to worry about floating points here since they + // are handled above. + if (jl_is_immutable(dt) && jl_datatype_nfields(dt) == 0 && + (dt->size == 1 || dt->size == 2 || dt->size == 4 || + dt->size == 8 || dt->size == 16)) + return; + + // C.8 + // If the argument has an alignment of 16 then the NGRN is rounded up to + // the next even number. + // C.9 + // If the argument is an Integral Type, the size of the argument is equal + // to 16 and the NGRN is less than 7, the argument is copied to x[NGRN] + // and x[NGRN+1]. x[NGRN] shall contain the lower addressed double-word + // of the memory representation of the argument. The NGRN is incremented + // by two. The argument has now been allocated. + // + // C.10 + // If the argument is a Composite Type and the size in double-words of + // the argument is not more than 8 minus NGRN, then the argument is + // copied into consecutive general-purpose registers, starting at x[NGRN]. + // The argument is passed as though it had been loaded into the registers + // from a double-word-aligned address with an appropriate sequence of LDR + // instructions loading consecutive registers from memory (the contents of + // any unused parts of the registers are unspecified by this standard). + // The NGRN is incremented by the number of registers used. The argument + // has now been allocated. + // We don't check for composite types here since the ones that have + // corresponding C types are already handled and we just treat the ones + // with weird size as a black box composite type. + // The type can fit in 8 x 8 bytes since it is handled by + // need_pass_by_ref otherwise. + *need_rewrite = true; + + // C.11 + // The NGRN is set to 8. + // C.12 + // The NSAA is rounded up to the larger of 8 or the Natural Alignment + // of the argument’s type. + // C.13 + // If the argument is a composite type then the argument is copied to + // memory at the adjusted NSAA. The NSAA is incremented by the size of + // the argument. The argument has now been allocated. + // + // C.14 + // If the size of the argument is less than 8 bytes then the size of the + // argument is set to 8 bytes. The effect is as if the argument was + // copied to the least significant bits of a 64-bit register and the + // remaining bits filled with unspecified values. + // C.15 + // The argument is copied to memory at the adjusted NSAA. The NSAA is + // incremented by the size of the argument. The argument has now been + // allocated. + // +} + +bool use_sret(AbiState*, jl_value_t *ty) +{ + // Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty) + // Section 5.5 + // If the type, T, of the result of a function is such that + // + // void func(T arg) + // + // would require that arg be passed as a value in a register (or set of + // registers) according to the rules in section 5.4 Parameter Passing, + // then the result is returned in the same registers as would be used for + // such an argument. + bool fpreg = false; + bool onstack = false; + bool need_rewrite = false; + classify_arg(ty, &fpreg, &onstack, &need_rewrite); + return onstack; +} + +Type *preferred_llvm_type(jl_value_t *ty, bool) +{ + if (!jl_is_datatype(ty) || jl_is_abstracttype(ty)) + return NULL; + jl_datatype_t *dt = (jl_datatype_t*)ty; + if (Type *fptype = get_llvm_fptype(dt)) + return fptype; + bool fpreg = false; + bool onstack = false; + bool need_rewrite = false; + classify_arg(ty, &fpreg, &onstack, &need_rewrite); + if (!need_rewrite) + return NULL; + if (fpreg) { + // Rewrite to [n x fptype] where n is the number of field + // This only happens for isHFAorHVA + size_t members = jl_datatype_nfields(dt); + assert(members > 0 && members <= 4); + jl_datatype_t *eltype = (jl_datatype_t*)jl_field_type(dt, 0); + return ArrayType::get(get_llvm_fptype(eltype), members); + } + else { + // Rewrite to [n x Int64] where n is the **size in dword** + assert(dt->size <= 16); // Should be pass by reference otherwise + return ArrayType::get(T_int64, (dt->size + 7) >> 3); + } +} + +} diff --git a/src/ccall.cpp b/src/ccall.cpp index dc10d46cdc802..9803c03799780 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -141,6 +141,8 @@ static Value *runtime_sym_lookup(PointerType *funcptype, const char *f_lib, cons # else # include "abi_x86.cpp" # endif +#elif defined _CPU_AARCH64_ +# include "abi_aarch64.cpp" #else # warning "ccall is defaulting to llvm ABI, since no platform ABI has been defined for this CPU/OS combination" # include "abi_llvm.cpp" @@ -900,8 +902,12 @@ static std::string generate_func_sig( // Note that even though the LLVM argument is called ByVal // this really means that the thing we're passing is pointing to // the thing we want to pass by value +#ifndef _CPU_AARCH64_ + // the aarch64 backend seems to interpret ByVal as + // implicitly passed on stack. if (byRef) paramattrs[i + sret].addAttribute(Attribute::ByVal); +#endif if (inReg) paramattrs[i + sret].addAttribute(Attribute::InReg); if (av != Attribute::None) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 7dca7cc83d5d0..7a5b0c0b598ac 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -923,15 +923,15 @@ JL_DLLEXPORT Type *julia_type_to_llvm(jl_value_t *jt, bool *isboxed) if (jl_is_floattype(jt)) { #ifndef DISABLE_FLOAT16 if (nb == 2) - return Type::getHalfTy(jl_LLVMContext); + return T_float16; else #endif if (nb == 4) - return Type::getFloatTy(jl_LLVMContext); + return T_float32; else if (nb == 8) - return Type::getDoubleTy(jl_LLVMContext); + return T_float64; else if (nb == 16) - return Type::getFP128Ty(jl_LLVMContext); + return T_float128; } return Type::getIntNTy(jl_LLVMContext, nb*8); } diff --git a/src/codegen.cpp b/src/codegen.cpp index 9d0f8f707bdf8..fdaae6403bb5c 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -246,8 +246,10 @@ static IntegerType *T_uint64; static IntegerType *T_char; static IntegerType *T_size; +static Type *T_float16; static Type *T_float32; static Type *T_float64; +static Type *T_float128; static Type *T_pint8; static Type *T_pint16; @@ -5512,10 +5514,12 @@ static void init_julia_llvm_env(Module *m) else T_size = T_uint32; T_psize = PointerType::get(T_size, 0); + T_float16 = Type::getHalfTy(getGlobalContext()); T_float32 = Type::getFloatTy(getGlobalContext()); T_pfloat32 = PointerType::get(T_float32, 0); T_float64 = Type::getDoubleTy(getGlobalContext()); T_pfloat64 = PointerType::get(T_float64, 0); + T_float128 = Type::getFP128Ty(getGlobalContext()); T_void = Type::getVoidTy(jl_LLVMContext); T_pvoidfunc = FunctionType::get(T_void, /*isVarArg*/false)->getPointerTo(); diff --git a/src/init.c b/src/init.c index a1a3601969a37..edf4f4a70e200 100644 --- a/src/init.c +++ b/src/init.c @@ -801,6 +801,7 @@ void jl_get_builtin_hooks(void) jl_uint32_type = (jl_datatype_t*)core("UInt32"); jl_uint64_type = (jl_datatype_t*)core("UInt64"); + jl_float16_type = (jl_datatype_t*)core("Float16"); jl_float32_type = (jl_datatype_t*)core("Float32"); jl_float64_type = (jl_datatype_t*)core("Float64"); jl_floatingpoint_type = (jl_datatype_t*)core("AbstractFloat"); diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index c2302016f96ca..4433c0c2a3608 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -61,15 +61,15 @@ static Type *FTnbits(size_t nb) { #ifndef DISABLE_FLOAT16 if (nb == 16) - return Type::getHalfTy(jl_LLVMContext); + return T_float16; else #endif if (nb == 32) - return Type::getFloatTy(jl_LLVMContext); + return T_float32; else if (nb == 64) - return Type::getDoubleTy(jl_LLVMContext); + return T_float64; else if (nb == 128) - return Type::getFP128Ty(jl_LLVMContext); + return T_float128; else jl_error("Unsupported Float Size"); } @@ -107,7 +107,7 @@ static jl_value_t *JL_JLUINTT(Type *t) assert(!t->isIntegerTy()); if (t == T_float32) return (jl_value_t*)jl_uint32_type; if (t == T_float64) return (jl_value_t*)jl_uint64_type; - if (t == Type::getHalfTy(jl_LLVMContext)) return (jl_value_t*)jl_uint16_type; + if (t == T_float16) return (jl_value_t*)jl_uint16_type; assert(t == T_void); return jl_bottom_type; } @@ -116,7 +116,7 @@ static jl_value_t *JL_JLSINTT(Type *t) assert(!t->isIntegerTy()); if (t == T_float32) return (jl_value_t*)jl_int32_type; if (t == T_float64) return (jl_value_t*)jl_int64_type; - if (t == Type::getHalfTy(jl_LLVMContext)) return (jl_value_t*)jl_int16_type; + if (t == T_float16) return (jl_value_t*)jl_int16_type; assert(t == T_void); return jl_bottom_type; } diff --git a/src/jltypes.c b/src/jltypes.c index db316bbd50dff..ac717be4fbc70 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -50,6 +50,7 @@ jl_datatype_t *jl_int32_type; jl_datatype_t *jl_uint32_type; jl_datatype_t *jl_int64_type; jl_datatype_t *jl_uint64_type; +jl_datatype_t *jl_float16_type; jl_datatype_t *jl_float32_type; jl_datatype_t *jl_float64_type; jl_datatype_t *jl_floatingpoint_type; diff --git a/src/julia.h b/src/julia.h index b93c268f284b9..120783c33e507 100644 --- a/src/julia.h +++ b/src/julia.h @@ -25,6 +25,9 @@ extern "C" { # define jl_jmp_buf sigjmp_buf # if defined(_CPU_ARM_) # define MAX_ALIGN 8 +# elif defined(_CPU_AARCH64_) +// int128 is 16 bytes aligned on aarch64 +# define MAX_ALIGN 16 # else # define MAX_ALIGN sizeof(void*) # endif @@ -508,6 +511,7 @@ extern JL_DLLEXPORT jl_datatype_t *jl_int32_type; extern JL_DLLEXPORT jl_datatype_t *jl_uint32_type; extern JL_DLLEXPORT jl_datatype_t *jl_int64_type; extern JL_DLLEXPORT jl_datatype_t *jl_uint64_type; +extern JL_DLLEXPORT jl_datatype_t *jl_float16_type; extern JL_DLLEXPORT jl_datatype_t *jl_float32_type; extern JL_DLLEXPORT jl_datatype_t *jl_float64_type; extern JL_DLLEXPORT jl_datatype_t *jl_floatingpoint_type;