- Sponsor
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ARM ABI #14194
ARM ABI #14194
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,285 @@ | ||
// This file is a part of Julia. License is MIT: http://julialang.org/license | ||
|
||
//===----------------------------------------------------------------------===// | ||
// | ||
// The ABI implementation used for ARM targets. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// The Procedure Call Standard can be found here: | ||
// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042f/IHI0042F_aapcs.pdf | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef __ARM_EABI__ | ||
# error "the Julia ARM ABI implementation only supports EABI" | ||
#endif | ||
|
||
#ifndef __ARM_PCS_VFP | ||
# error "the Julia ARM ABI implementation requires VFP support" | ||
#endif | ||
|
||
namespace { | ||
|
||
typedef bool AbiState; | ||
AbiState default_abi_state = 0; | ||
|
||
void needPassByRef(AbiState *state,jl_value_t *ty, bool *byRef, bool *inReg) | ||
{ | ||
return; | ||
} | ||
|
||
bool need_private_copy(jl_value_t *ty, bool byRef) | ||
{ | ||
return false; | ||
} | ||
|
||
static Type *get_llvm_fptype(jl_datatype_t *dt) | ||
{ | ||
// Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt) | ||
if (dt->mutabl || jl_datatype_nfields(dt) != 0) | ||
return NULL; | ||
Type *lltype; | ||
// Check size first since it's cheaper. | ||
switch (dt->size) { | ||
case 2: | ||
lltype = T_float16; | ||
break; | ||
case 4: | ||
lltype = T_float32; | ||
break; | ||
case 8: | ||
lltype = T_float64; | ||
break; | ||
default: | ||
return NULL; | ||
} | ||
return jl_is_floattype((jl_value_t*)dt) ? lltype : NULL; | ||
} | ||
|
||
static size_t isLegalHA(jl_datatype_t *dt, Type *&base); | ||
|
||
// Check whether a type contained by a candidate homogeneous aggregate is valid. | ||
// Returns the corresponding LLVM type. | ||
static Type *isLegalHAType(jl_datatype_t *dt) | ||
{ | ||
// single- or double-precision floating-point type | ||
Type* fp = get_llvm_fptype(dt); | ||
if (fp) | ||
return fp; | ||
|
||
// NOT SUPPORTED: 64- or 128-bit containerized vectors | ||
|
||
// recursive application (composite types can contain other composites) | ||
Type *base = NULL; | ||
if (isLegalHA(dt, base)) | ||
return base; | ||
|
||
return NULL; | ||
} | ||
|
||
// Check whether a type is a legal homogeneous aggregate. Returns the number of | ||
// members. | ||
static size_t isLegalHA(jl_datatype_t *dt, Type *&base) { | ||
// Homogeneous aggregates are only used for VFP registers, | ||
// so use that definition of legality (section 6.1.2.1) | ||
|
||
if (jl_is_structtype(dt)) { | ||
// ... with one to four Elements. | ||
size_t members = jl_datatype_nfields(dt); | ||
if (members < 1 || members > 4) | ||
return 0; | ||
|
||
base = NULL; | ||
for (size_t i = 0; i < members; ++i) { | ||
Type *T = isLegalHAType((jl_datatype_t*)jl_field_type(dt,i)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And I believe the members counting is wrong for recursive types. |
||
if (!T) | ||
return 0; | ||
|
||
if (!base) | ||
base = T; | ||
else if (base != T) | ||
return 0; | ||
} | ||
return members; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
// Determine if an argument can be passed through a coprocessor register. | ||
// | ||
// All the out parameters should be default to `false`. | ||
static void classify_cprc(jl_datatype_t *dt, bool *vfp) | ||
{ | ||
// Based on section 6.1 of the Procedure Call Standard | ||
|
||
// VFP: 6.1.2.1 | ||
// - A half-precision floating-point type. | ||
// - A single-precision floating-point type. | ||
// - A double-precision floating-point type. | ||
if (get_llvm_fptype(dt)) { | ||
*vfp = true; | ||
return; | ||
} | ||
|
||
// NOT SUPPORTED: A 64-bit or 128-bit containerized vector type. | ||
|
||
// - A Homogeneous Aggregate | ||
Type *base = NULL; | ||
if (isLegalHA(dt, base)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can just use almost the same logic (without
In particular, only HA of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The AAPCS doc is much harder to read indeed, some sections just seem out of order... I guess they've build on that experience when drafting the AArch doc. Anyway, I think some aspects of the more complex legality definition are needed though, for example handling of nested composites with the same type (this happens in my extended test suite containing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe I'm missing something but why does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See section 4.3:
For example: struct large_nonha {
struct {
double a;
float b;
};
};
struct large_ha {
struct {
double a;
double b;
};
};
struct large_nonha foo() {}
struct large_ha bar() {} Using clang, this yields: %struct.large_nonha = type { %struct.anon }
%struct.anon = type { double, float }
%struct.large_ha = type { %struct.anon.0 }
%struct.anon.0 = type { double, double }
define void @foo(%struct.large_nonha* noalias sret %agg.result)
define %struct.large_ha @bar() The first is returned via I assume this might apply to AArch64 as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right. I guess the relevant statement is |
||
*vfp = true; | ||
return; | ||
} | ||
} | ||
|
||
static void classify_return_arg(jl_value_t *ty, bool *reg, | ||
bool *onstack, bool *need_rewrite) | ||
{ | ||
// Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty) | ||
jl_datatype_t *dt = (jl_datatype_t*)ty; | ||
|
||
// Based on section 5.4 of the Procedure Call Standard | ||
|
||
// VFP standard variant: see 6.1.2.2 | ||
// Any result whose type would satisfy the conditions for a VFP CPRC is | ||
// returned in the appropriate number of consecutive VFP registers | ||
// starting with the lowest numbered register (s0, d0, q0). | ||
classify_cprc(dt, reg); | ||
if (*reg) | ||
return; | ||
|
||
// - A Half-precision Floating Point Type is returned in the least | ||
// significant 16 bits of r0. | ||
if (dt == jl_float16_type) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was trying to figure out how clang handles this but it (and also gcc) always complains that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By the way, this might interest you: I've created a small |
||
*reg = true; | ||
return; | ||
} | ||
|
||
// - A Fundamental Data Type that is smaller than 4 bytes is zero- or | ||
// sign-extended to a word and returned in r0. | ||
// - A double-word sized Fundamental Data Type (e.g., long long, double and | ||
// 64-bit containerized vectors) is returned in r0 and r1. | ||
// - A word-sized Fundamental Data Type (eg., int, float) is returned in r0. | ||
// NOTE: assuming "fundamental type" == jl_is_bitstype, might need exact def | ||
if (jl_is_bitstype(dt) && dt->size <= 8) { | ||
*reg = true; | ||
return; | ||
} | ||
|
||
// If we ever support containerized vectors on an ARMv7 without VFP, | ||
// these can be returned in r0-r3 as well. | ||
|
||
// NOTE: we don't check for jl_is_structtype below, because at this point | ||
// everything will be rewritten to look like a composite aggregate | ||
*need_rewrite = true; | ||
|
||
// - A Composite Type not larger than 4 bytes is returned in r0. The format | ||
// is as if the result had been stored in memory at a word-aligned address | ||
// and then loaded into r0 with an LDR instruction. Any bits in r0 that | ||
// lie outside the bounds of the result have unspecified values. | ||
// - A Composite Type larger than 4 bytes, or whose size cannot be | ||
// determined statically by both caller and callee, is stored in memory at | ||
// an address passed as an extra argument when the function was called | ||
// (§5.5, rule A.4). The memory to be used for the result may be modified | ||
// at any point during the function call. | ||
if (dt->size <= 4) | ||
*reg = true; | ||
else | ||
*onstack = true; | ||
} | ||
|
||
bool use_sret(AbiState *state, jl_value_t *ty) | ||
{ | ||
// Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty) | ||
|
||
bool reg = false; | ||
bool onstack = false; | ||
bool need_rewrite = false; | ||
classify_return_arg(ty, ®, &onstack, &need_rewrite); | ||
|
||
return onstack; | ||
} | ||
|
||
// Determine which kind of register the argument will be passed in and | ||
// if the argument has to be passed on stack (including by reference). | ||
// | ||
// If the argument should be passed in SIMD and floating-point registers, | ||
// we may need to rewrite the argument types to [n x ftype]. | ||
// If the argument should be passed in general purpose registers, we may need | ||
// to rewrite the argument types to [n x i64]. | ||
// | ||
// If the argument has to be passed on stack, we need to use sret. | ||
// | ||
// All the out parameters should be default to `false`. | ||
static void classify_arg(jl_value_t *ty, bool *reg, | ||
bool *onstack, bool *need_rewrite) | ||
{ | ||
// Assume jl_is_datatype(ty) && !jl_is_abstracttype(ty) | ||
jl_datatype_t *dt = (jl_datatype_t*)ty; | ||
|
||
// Based on section 5.5 of the Procedure Call Standard | ||
|
||
// C.1.cp | ||
// If the argument is a CPRC and there are sufficient unallocated | ||
// co-processor registers of the appropriate class, the argument is | ||
// allocated to co-processor registers. | ||
classify_cprc(dt, reg); | ||
if (*reg) | ||
return; | ||
|
||
// Handle fundamental types | ||
if (jl_is_bitstype(dt) && dt->size <= 8) { | ||
*reg = true; | ||
return; | ||
} | ||
|
||
*need_rewrite = true; | ||
} | ||
|
||
Type *preferred_llvm_type(jl_value_t *ty, bool isret) | ||
{ | ||
if (!jl_is_datatype(ty) || jl_is_abstracttype(ty)) | ||
return NULL; | ||
jl_datatype_t *dt = (jl_datatype_t*)ty; | ||
|
||
if (Type *fptype = get_llvm_fptype(dt)) | ||
return fptype; | ||
|
||
bool reg = false; | ||
bool onstack = false; | ||
bool need_rewrite = false; | ||
if (isret) | ||
classify_return_arg(ty, ®, &onstack, &need_rewrite); | ||
else | ||
classify_arg(ty, ®, &onstack, &need_rewrite); | ||
|
||
if (!need_rewrite) | ||
return NULL; | ||
|
||
// Based on section 4 of the Procedure Call Standard | ||
|
||
// If some type is illegal and needs to be rewritten, | ||
// represent it as an aggregate composite type. | ||
|
||
// 4.3.1: aggregates | ||
// - The alignment of an aggregate shall be the alignment of its | ||
// most-aligned component. | ||
// - The size of an aggregate shall be the smallest multiple of its | ||
// alignment that is sufficient to hold all of its members when they are | ||
// laid out according to these rules. | ||
// 5.5 B.5 | ||
// For a Composite Type, the alignment of the copy will have 4-byte | ||
// alignment if its natural alignment is <= 4 and 8-byte alignment if | ||
// its natural alignment is >= 8 | ||
size_t align = dt->alignment; | ||
if (align < 4) | ||
align = 4; | ||
if (align > 8) | ||
align = 8; | ||
|
||
Type* T = Type::getIntNTy(getGlobalContext(), align*8); | ||
return ArrayType::get(T, (dt->size + align - 1) / align); | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I noticed this when fixing it for AArch64. I think you need to check
pointerfree
here.haspadding
should be another field to check as a fast path.