Skip to content

Commit

Permalink
Implement ARM ABI.
Browse files Browse the repository at this point in the history
(cherry picked from commit 0eae5e5)
ref #14194
  • Loading branch information
maleadt authored and tkelman committed Mar 27, 2016
1 parent 03c072d commit 0b2c097
Show file tree
Hide file tree
Showing 3 changed files with 295 additions and 6 deletions.
285 changes: 285 additions & 0 deletions src/abi_arm.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,285 @@
// This file is a part of Julia. License is MIT: http://julialang.org/license

//===----------------------------------------------------------------------===//
//
// The ABI implementation used for ARM targets.
//
//===----------------------------------------------------------------------===//
//
// The Procedure Call Standard can be found here:
// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042f/IHI0042F_aapcs.pdf
//
//===----------------------------------------------------------------------===//

#ifndef __ARM_EABI__
# error "the Julia ARM ABI implementation only supports EABI"
#endif

#ifndef __ARM_PCS_VFP
# error "the Julia ARM ABI implementation requires VFP support"
#endif

namespace {

typedef bool AbiState;
AbiState default_abi_state = 0;

void needPassByRef(AbiState *state,jl_value_t *ty, bool *byRef, bool *inReg)
{
return;
}

bool need_private_copy(jl_value_t *ty, bool byRef)
{
return false;
}

static Type *get_llvm_fptype(jl_datatype_t *dt)
{
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
if (dt->mutabl || jl_datatype_nfields(dt) != 0)
return NULL;
Type *lltype;
// Check size first since it's cheaper.
switch (dt->size) {
case 2:
lltype = T_float16;
break;
case 4:
lltype = T_float32;
break;
case 8:
lltype = T_float64;
break;
default:
return NULL;
}
return jl_is_floattype((jl_value_t*)dt) ? lltype : NULL;
}

static size_t isLegalHA(jl_datatype_t *dt, Type *&base);

// Check whether a type contained by a candidate homogeneous aggregate is valid.
// Returns the corresponding LLVM type.
static Type *isLegalHAType(jl_datatype_t *dt)
{
// single- or double-precision floating-point type
Type* fp = get_llvm_fptype(dt);
if (fp)
return fp;

// NOT SUPPORTED: 64- or 128-bit containerized vectors

// recursive application (composite types can contain other composites)
Type *base = NULL;
if (isLegalHA(dt, base))
return base;

return NULL;
}

// Check whether a type is a legal homogeneous aggregate. Returns the number of
// members.
static size_t isLegalHA(jl_datatype_t *dt, Type *&base) {
// Homogeneous aggregates are only used for VFP registers,
// so use that definition of legality (section 6.1.2.1)

if (jl_is_structtype(dt)) {
// ... with one to four Elements.
size_t members = jl_datatype_nfields(dt);
if (members < 1 || members > 4)
return 0;

base = NULL;
for (size_t i = 0; i < members; ++i) {
Type *T = isLegalHAType((jl_datatype_t*)jl_field_type(dt,i));
if (!T)
return 0;

if (!base)
base = T;
else if (base != T)
return 0;
}
return members;
}

return 0;
}

// Determine if an argument can be passed through a coprocessor register.
//
// All the out parameters should be default to `false`.
static void classify_cprc(jl_datatype_t *dt, bool *vfp)
{
// Based on section 6.1 of the Procedure Call Standard

// VFP: 6.1.2.1
// - A half-precision floating-point type.
// - A single-precision floating-point type.
// - A double-precision floating-point type.
if (get_llvm_fptype(dt)) {
*vfp = true;
return;
}

// NOT SUPPORTED: A 64-bit or 128-bit containerized vector type.

// - A Homogeneous Aggregate
Type *base = NULL;
if (isLegalHA(dt, base)) {
*vfp = true;
return;
}
}

static void classify_return_arg(jl_value_t *ty, bool *reg,
bool *onstack, bool *need_rewrite)
{
// Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty)
jl_datatype_t *dt = (jl_datatype_t*)ty;

// Based on section 5.4 of the Procedure Call Standard

// VFP standard variant: see 6.1.2.2
// Any result whose type would satisfy the conditions for a VFP CPRC is
// returned in the appropriate number of consecutive VFP registers
// starting with the lowest numbered register (s0, d0, q0).
classify_cprc(dt, reg);
if (*reg)
return;

// - A Half-precision Floating Point Type is returned in the least
// significant 16 bits of r0.
if (dt == jl_float16_type) {
*reg = true;
return;
}

// - A Fundamental Data Type that is smaller than 4 bytes is zero- or
// sign-extended to a word and returned in r0.
// - A double-word sized Fundamental Data Type (e.g., long long, double and
// 64-bit containerized vectors) is returned in r0 and r1.
// - A word-sized Fundamental Data Type (eg., int, float) is returned in r0.
// NOTE: assuming "fundamental type" == jl_is_bitstype, might need exact def
if (jl_is_bitstype(dt) && dt->size <= 8) {
*reg = true;
return;
}

// If we ever support containerized vectors on an ARMv7 without VFP,
// these can be returned in r0-r3 as well.

// NOTE: we don't check for jl_is_structtype below, because at this point
// everything will be rewritten to look like a composite aggregate
*need_rewrite = true;

// - A Composite Type not larger than 4 bytes is returned in r0. The format
// is as if the result had been stored in memory at a word-aligned address
// and then loaded into r0 with an LDR instruction. Any bits in r0 that
// lie outside the bounds of the result have unspecified values.
// - A Composite Type larger than 4 bytes, or whose size cannot be
// determined statically by both caller and callee, is stored in memory at
// an address passed as an extra argument when the function was called
// (§5.5, rule A.4). The memory to be used for the result may be modified
// at any point during the function call.
if (dt->size <= 4)
*reg = true;
else
*onstack = true;
}

bool use_sret(AbiState *state, jl_value_t *ty)
{
// Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty)

bool reg = false;
bool onstack = false;
bool need_rewrite = false;
classify_return_arg(ty, &reg, &onstack, &need_rewrite);

return onstack;
}

// Determine which kind of register the argument will be passed in and
// if the argument has to be passed on stack (including by reference).
//
// If the argument should be passed in SIMD and floating-point registers,
// we may need to rewrite the argument types to [n x ftype].
// If the argument should be passed in general purpose registers, we may need
// to rewrite the argument types to [n x i64].
//
// If the argument has to be passed on stack, we need to use sret.
//
// All the out parameters should be default to `false`.
static void classify_arg(jl_value_t *ty, bool *reg,
bool *onstack, bool *need_rewrite)
{
// Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty)
jl_datatype_t *dt = (jl_datatype_t*)ty;

// Based on section 5.5 of the Procedure Call Standard

// C.1.cp
// If the argument is a CPRC and there are sufficient unallocated
// co-processor registers of the appropriate class, the argument is
// allocated to co-processor registers.
classify_cprc(dt, reg);
if (*reg)
return;

// Handle fundamental types
if (jl_is_bitstype(dt) && dt->size <= 8) {
*reg = true;
return;
}

*need_rewrite = true;
}

Type *preferred_llvm_type(jl_value_t *ty, bool isret)
{
if (!jl_is_datatype(ty) || jl_is_abstracttype(ty))
return NULL;
jl_datatype_t *dt = (jl_datatype_t*)ty;

if (Type *fptype = get_llvm_fptype(dt))
return fptype;

bool reg = false;
bool onstack = false;
bool need_rewrite = false;
if (isret)
classify_return_arg(ty, &reg, &onstack, &need_rewrite);
else
classify_arg(ty, &reg, &onstack, &need_rewrite);

if (!need_rewrite)
return NULL;

// Based on section 4 of the Procedure Call Standard

// If some type is illegal and needs to be rewritten,
// represent it as an aggregate composite type.

// 4.3.1: aggregates
// - The alignment of an aggregate shall be the alignment of its
// most-aligned component.
// - The size of an aggregate shall be the smallest multiple of its
// alignment that is sufficient to hold all of its members when they are
// laid out according to these rules.
// 5.5 B.5
// For a Composite Type, the alignment of the copy will have 4-byte
// alignment if its natural alignment is <= 4 and 8-byte alignment if
// its natural alignment is >= 8
size_t align = dt->alignment;
if (align < 4)
align = 4;
if (align > 8)
align = 8;

Type* T = Type::getIntNTy(getGlobalContext(), align*8);
return ArrayType::get(T, (dt->size + align - 1) / align);
}

}
9 changes: 6 additions & 3 deletions src/ccall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ static Value *runtime_sym_lookup(PointerType *funcptype, char *f_lib, char *f_na
# else
# include "abi_x86.cpp"
# endif
#elif defined _CPU_ARM_
# include "abi_arm.cpp"
#elif defined _CPU_AARCH64_
# include "abi_aarch64.cpp"
#else
Expand Down Expand Up @@ -1265,12 +1267,13 @@ static Value *emit_ccall(jl_value_t **args, size_t nargs, jl_codectx_t *ctx)
Value *mem = emit_static_alloca(lrt, ctx);
builder.CreateStore(result, mem);
result = mem;
argvals[0] = result;
}
else {
// XXX: result needs a GC root here if result->getType() == jl_pvalue_llvmt
argvals[0] = builder.CreateBitCast(result, fargt_sig[0]);
// XXX: result needs a GC root here if result->getType() == T_pjlvalue
result = sret_val.V;
}
argvals[0] = builder.CreateBitCast(result, fargt_sig.at(0));
sretboxed = sret_val.isboxed;
}

// save argument depth until after we're done emitting arguments
Expand Down
7 changes: 4 additions & 3 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3717,7 +3717,7 @@ static Function *gen_cfun_wrapper(jl_function_t *ff, jl_value_t *jlrettype, jl_t
size_t FParamIndex = 0;
if (jlfunc_sret) {
if (sret)
result = sretPtr;
result = builder.CreateBitCast(sretPtr, theFptr->getFunctionType()->getParamType(0));
else
result = builder.CreateAlloca(theFptr->getFunctionType()->getParamType(0)->getContainedType(0));
args.push_back(result);
Expand Down Expand Up @@ -5695,9 +5695,10 @@ static inline SmallVector<std::string,10> getTargetFeatures() {
std::string cpu = strcmp(jl_options.cpu_target,"native") ? jl_options.cpu_target : sys::getHostCPUName();
if (cpu.empty() || cpu == "generic") {
jl_printf(JL_STDERR, "WARNING: unable to determine host cpu name.\n");
#ifdef _CPU_ARM_
#if defined(_CPU_ARM_) && defined(__ARM_PCS_VFP)
// Check if this is required when you have read the features directly from the processor
// the processors that don't have VFP are old and (hopefully) rare. this affects the platform calling convention.
// This affects the platform calling convention.
// TODO: enable vfp3 for ARMv7+ (but adapt the ABI)
HostFeatures["vfp2"] = true;
#endif
}
Expand Down

0 comments on commit 0b2c097

Please sign in to comment.