diff --git a/src/abi_arm.cpp b/src/abi_arm.cpp new file mode 100644 index 0000000000000..4771089d1e7b5 --- /dev/null +++ b/src/abi_arm.cpp @@ -0,0 +1,299 @@ +// This file is a part of Julia. License is MIT: http://julialang.org/license + +//===----------------------------------------------------------------------===// +// +// The ABI implementation used for ARM targets. +// +//===----------------------------------------------------------------------===// +// +// The Procedure Call Standard can be found here: +// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042f/IHI0042F_aapcs.pdf +// +//===----------------------------------------------------------------------===// + +#ifndef __ARM_EABI__ +# error "the Julia ARM ABI implementation only supports EABI" +#endif + +#ifndef __ARM_PCS_VFP +# error "the Julia ARM ABI implementation requires VFP support" +#endif + +namespace { + +typedef bool AbiState; +AbiState default_abi_state = 0; + +void needPassByRef(AbiState *state,jl_value_t *ty, bool *byRef, bool *inReg) +{ + return; +} + +bool need_private_copy(jl_value_t *ty, bool byRef) +{ + return false; +} + +static Type *get_llvm_fptype(jl_datatype_t *dt) +{ + // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt) + if (dt->mutabl || jl_datatype_nfields(dt) != 0) + return NULL; + Type *lltype; + // Check size first since it's cheaper. + switch (dt->size) { + case 2: + lltype = T_float16; + break; + case 4: + lltype = T_float32; + break; + case 8: + lltype = T_float64; + break; + default: + return NULL; + } + return jl_is_floattype((jl_value_t*)dt) ? lltype : NULL; +} + +static size_t isLegalHA(jl_datatype_t *dt, Type *&base); + +// Check whether a type contained by a candidate homogeneous aggregate is valid +// fundamental type. +// +// Returns the corresponding LLVM type. +static Type *isLegalHAType(jl_datatype_t *dt) +{ + // single- or double-precision floating-point type + if (Type* fp = get_llvm_fptype(dt)) + return fp; + + // NOT SUPPORTED: 64- or 128-bit containerized vectors + + return NULL; +} + +// Check whether a type is a legal homogeneous aggregate. +// Returns the number of fundamental members. +// +// Legality of the HA is determined by a nonzero return value. +// In case of a non-legal HA, the value of 'base' is undefined. +static size_t isLegalHA(jl_datatype_t *dt, Type *&base) { + // Homogeneous aggregates are only used for VFP registers, + // so use that definition of legality (section 6.1.2.1) + + if (jl_is_structtype(dt)) { + // Fast path checks before descending the type hierarchy + // (4 x 128b vector == 64B max size) + if (dt->size > 64 || !dt->pointerfree || dt->haspadding) + return 0; + + base = NULL; + size_t total_members = 0; + + size_t parent_members = jl_datatype_nfields(dt); + for (size_t i = 0; i < parent_members; ++i) { + jl_datatype_t *fdt = (jl_datatype_t*)jl_field_type(dt,i); + + Type *T = isLegalHAType(fdt); + if (T) + total_members++; + else if (size_t field_members = isLegalHA(fdt, T)) + // recursive application (expanding nested composite types) + total_members += field_members; + else + return 0; + + if (!base) + base = T; + else if (base != T) + return 0; + } + + // ... with one to four Elements. + if (total_members < 1 || total_members > 4) + return 0; + + return total_members; + } + + return 0; +} + +// Determine if an argument can be passed through a coprocessor register. +// +// All the out parameters should be default to `false`. +static void classify_cprc(jl_datatype_t *dt, bool *vfp) +{ + // Based on section 6.1 of the Procedure Call Standard + + // VFP: 6.1.2.1 + // - A half-precision floating-point type. + // - A single-precision floating-point type. + // - A double-precision floating-point type. + if (get_llvm_fptype(dt)) { + *vfp = true; + return; + } + + // NOT SUPPORTED: A 64-bit or 128-bit containerized vector type. + + // - A Homogeneous Aggregate + Type *base = NULL; + if (isLegalHA(dt, base)) { + *vfp = true; + return; + } +} + +static void classify_return_arg(jl_value_t *ty, bool *reg, + bool *onstack, bool *need_rewrite) +{ + // Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty) + jl_datatype_t *dt = (jl_datatype_t*)ty; + + // Based on section 5.4 of the Procedure Call Standard + + // VFP standard variant: see 6.1.2.2 + // Any result whose type would satisfy the conditions for a VFP CPRC is + // returned in the appropriate number of consecutive VFP registers + // starting with the lowest numbered register (s0, d0, q0). + classify_cprc(dt, reg); + if (*reg) + return; + + // - A Half-precision Floating Point Type is returned in the least + // significant 16 bits of r0. + if (dt == jl_float16_type) { + *reg = true; + return; + } + + // - A Fundamental Data Type that is smaller than 4 bytes is zero- or + // sign-extended to a word and returned in r0. + // - A double-word sized Fundamental Data Type (e.g., long long, double and + // 64-bit containerized vectors) is returned in r0 and r1. + // - A word-sized Fundamental Data Type (eg., int, float) is returned in r0. + // NOTE: assuming "fundamental type" == jl_is_bitstype, might need exact def + if (jl_is_bitstype(dt) && dt->size <= 8) { + *reg = true; + return; + } + + // If we ever support containerized vectors on an ARMv7 without VFP, + // these can be returned in r0-r3 as well. + + // NOTE: we don't check for jl_is_structtype below, because at this point + // everything will be rewritten to look like a composite aggregate + *need_rewrite = true; + + // - A Composite Type not larger than 4 bytes is returned in r0. The format + // is as if the result had been stored in memory at a word-aligned address + // and then loaded into r0 with an LDR instruction. Any bits in r0 that + // lie outside the bounds of the result have unspecified values. + // - A Composite Type larger than 4 bytes, or whose size cannot be + // determined statically by both caller and callee, is stored in memory at + // an address passed as an extra argument when the function was called + // (ยง5.5, rule A.4). The memory to be used for the result may be modified + // at any point during the function call. + if (dt->size <= 4) + *reg = true; + else + *onstack = true; +} + +bool use_sret(AbiState *state, jl_value_t *ty) +{ + // Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty) + + bool reg = false; + bool onstack = false; + bool need_rewrite = false; + classify_return_arg(ty, ®, &onstack, &need_rewrite); + + return onstack; +} + +// Determine which kind of register the argument will be passed in and +// if the argument has to be passed on stack (including by reference). +// +// If the argument should be passed in SIMD and floating-point registers, +// we may need to rewrite the argument types to [n x ftype]. +// If the argument should be passed in general purpose registers, we may need +// to rewrite the argument types to [n x i64]. +// +// If the argument has to be passed on stack, we need to use sret. +// +// All the out parameters should be default to `false`. +static void classify_arg(jl_value_t *ty, bool *reg, + bool *onstack, bool *need_rewrite) +{ + // Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty) + jl_datatype_t *dt = (jl_datatype_t*)ty; + + // Based on section 5.5 of the Procedure Call Standard + + // C.1.cp + // If the argument is a CPRC and there are sufficient unallocated + // co-processor registers of the appropriate class, the argument is + // allocated to co-processor registers. + classify_cprc(dt, reg); + if (*reg) + return; + + // Handle fundamental types + if (jl_is_bitstype(dt) && dt->size <= 8) { + *reg = true; + return; + } + + *need_rewrite = true; +} + +Type *preferred_llvm_type(jl_value_t *ty, bool isret) +{ + if (!jl_is_datatype(ty) || jl_is_abstracttype(ty)) + return NULL; + jl_datatype_t *dt = (jl_datatype_t*)ty; + + if (Type *fptype = get_llvm_fptype(dt)) + return fptype; + + bool reg = false; + bool onstack = false; + bool need_rewrite = false; + if (isret) + classify_return_arg(ty, ®, &onstack, &need_rewrite); + else + classify_arg(ty, ®, &onstack, &need_rewrite); + + if (!need_rewrite) + return NULL; + + // Based on section 4 of the Procedure Call Standard + + // If some type is illegal and needs to be rewritten, + // represent it as an aggregate composite type. + + // 4.3.1: aggregates + // - The alignment of an aggregate shall be the alignment of its + // most-aligned component. + // - The size of an aggregate shall be the smallest multiple of its + // alignment that is sufficient to hold all of its members when they are + // laid out according to these rules. + // 5.5 B.5 + // For a Composite Type, the alignment of the copy will have 4-byte + // alignment if its natural alignment is <= 4 and 8-byte alignment if + // its natural alignment is >= 8 + size_t align = dt->alignment; + if (align < 4) + align = 4; + if (align > 8) + align = 8; + + Type* T = Type::getIntNTy(getGlobalContext(), align*8); + return ArrayType::get(T, (dt->size + align - 1) / align); +} + +} diff --git a/src/ccall.cpp b/src/ccall.cpp index 9803c03799780..40da4e97c99e8 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -141,6 +141,8 @@ static Value *runtime_sym_lookup(PointerType *funcptype, const char *f_lib, cons # else # include "abi_x86.cpp" # endif +#elif defined _CPU_ARM_ +# include "abi_arm.cpp" #elif defined _CPU_AARCH64_ # include "abi_aarch64.cpp" #else @@ -1231,13 +1233,12 @@ static jl_cgval_t emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx) Value *mem = emit_static_alloca(lrt, ctx); builder.CreateStore(sret_val.V, mem); result = mem; - argvals[0] = result; } else { // XXX: result needs a GC root here if result->getType() == T_pjlvalue result = sret_val.V; - argvals[0] = builder.CreateBitCast(result, fargt_sig.at(0)); } + argvals[0] = builder.CreateBitCast(result, fargt_sig.at(0)); sretboxed = sret_val.isboxed; } diff --git a/src/codegen.cpp b/src/codegen.cpp index 604b5c64a138f..f54297574b865 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -4174,7 +4174,7 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t size_t FParamIndex = 0; if (jlfunc_sret) { if (sret) - result = sretPtr; + result = builder.CreateBitCast(sretPtr, theFptr->getFunctionType()->getParamType(0)); else result = builder.CreateAlloca(theFptr->getFunctionType()->getParamType(0)->getContainedType(0)); args.push_back(result); @@ -6179,9 +6179,10 @@ static inline SmallVector getTargetFeatures() { std::string cpu = strcmp(jl_options.cpu_target,"native") ? jl_options.cpu_target : sys::getHostCPUName(); if (cpu.empty() || cpu == "generic") { jl_printf(JL_STDERR, "WARNING: unable to determine host cpu name.\n"); -#ifdef _CPU_ARM_ +#if defined(_CPU_ARM_) && defined(__ARM_PCS_VFP) // Check if this is required when you have read the features directly from the processor - // the processors that don't have VFP are old and (hopefully) rare. this affects the platform calling convention. + // This affects the platform calling convention. + // TODO: enable vfp3 for ARMv7+ (but adapt the ABI) HostFeatures["vfp2"] = true; #endif }