Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Inverted marking of vector tuple type #15244

Merged
merged 1 commit into from
Apr 30, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion base/boot.jl
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ export
SimpleVector, AbstractArray, DenseArray,
# special objects
Function, LambdaInfo, Method, MethodTable, TypeMapEntry, TypeMapLevel,
Module, Symbol, Task, Array, WeakRef,
Module, Symbol, Task, Array, WeakRef, VecElement,
# numeric types
Number, Real, Integer, Bool, Ref, Ptr,
AbstractFloat, Float16, Float32, Float64,
Expand Down Expand Up @@ -271,6 +271,10 @@ TypeConstructor(p::ANY, t::ANY) =

Void() = nothing

immutable VecElement{T}
value::T
end

Expr(args::ANY...) = _expr(args...)

_new(typ::Symbol, argty::Symbol) = eval(:((::Type{$typ})(n::$argty) = $(Expr(:new, typ, :n))))
Expand Down
43 changes: 43 additions & 0 deletions src/alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -822,10 +822,44 @@ JL_DLLEXPORT jl_datatype_t *jl_new_uninitialized_datatype(size_t nfields, int8_t
return t;
}

// Determine if homogeneous tuple with fields of type t will have
// a special alignment beyond normal Julia rules.
// Return special alignment if one exists, 0 if normal alignment rules hold.
// A non-zero result *must* match the LLVM rules for a vector type <nfields x t>.
// For sake of Ahead-Of-Time (AOT) compilation, this routine has to work
// without LLVM being available.
unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t) {
if (!is_vecelement_type(t))
return 0;
if (nfields>16 || (1<<nfields & 0x1157C) == 0)
// Number of fields is not 2, 3, 4, 5, 6, 8, 10, 12, or 16.
return 0;
assert(jl_datatype_nfields(t)==1);
jl_value_t *ty = jl_field_type(t, 0);
if( !jl_is_bitstype(ty) )
// LLVM requires that a vector element be a primitive type.
// LLVM allows pointer types as vector elements, but until a
// motivating use case comes up for Julia, we reject pointers.
return 0;
size_t elsz = jl_datatype_size(ty);
if (elsz>8 || (1<<elsz & 0x116) == 0)
// Element size is not 1, 2, 4, or 8.
return 0;
size_t size = nfields*elsz;
// LLVM's alignment rule for vectors seems to be to round up to
// a power of two, even if that's overkill for the target hardware.
size_t alignment=1;
for( ; size>alignment; alignment*=2 )
continue;
return alignment;
}

void jl_compute_field_offsets(jl_datatype_t *st)
{
size_t sz = 0, alignm = 1;
int ptrfree = 1;
int homogeneous = 1;
jl_value_t *lastty = NULL;

assert(0 <= st->fielddesc_type && st->fielddesc_type <= 2);

Expand Down Expand Up @@ -862,12 +896,21 @@ void jl_compute_field_offsets(jl_datatype_t *st)
if (al > alignm)
alignm = al;
}
homogeneous &= lastty==NULL || lastty==ty;
lastty = ty;
jl_field_setoffset(st, i, sz);
jl_field_setsize(st, i, fsz);
if (__unlikely(max_offset - sz < fsz))
jl_throw(jl_overflow_exception);
sz += fsz;
}
if (homogeneous && lastty!=NULL && jl_is_tuple_type(st)) {
// Some tuples become LLVM vectors with stronger alignment than what was calculated above.
unsigned al = jl_special_vector_alignment(jl_datatype_nfields(st), lastty);
assert(al % alignm == 0);
if (al)
alignm = al;
}
st->alignment = alignm;
st->size = LLT_ALIGN(sz, alignm);
if (st->size > sz)
Expand Down
100 changes: 68 additions & 32 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,30 +365,37 @@ static Type *julia_struct_to_llvm(jl_value_t *jt, bool *isboxed)
}
std::vector<Type*> latypes(0);
size_t i;
bool isarray = true;
bool isvector = true;
jl_value_t* jlasttype = NULL;
Type *lasttype = NULL;
for(i = 0; i < ntypes; i++) {
jl_value_t *ty = jl_svecref(jst->types, i);
if (jlasttype!=NULL && ty!=jlasttype)
isvector = false;
jlasttype = ty;
Type *lty;
if (jl_field_isptr(jst, i))
lty = T_pjlvalue;
else
lty = ty==(jl_value_t*)jl_bool_type ? T_int8 : julia_type_to_llvm(ty);
if (lasttype != NULL && lasttype != lty)
isvector = false;
isarray = false;
lasttype = lty;
if (type_is_ghost(lty))
lty = NoopType;
latypes.push_back(lty);
}
if (!isTuple) {
structdecl->setBody(latypes);
if (is_vecelement_type(jt))
// VecElement type is unwrapped in LLVM
jst->struct_decl = latypes[0];
else
structdecl->setBody(latypes);
}
else {
if (isvector && lasttype != T_int1 && !type_is_ghost(lasttype)) {
// TODO: currently we get LLVM assertion failures for other vector sizes
bool validVectorSize = (ntypes == 2 || ntypes == 4 || ntypes == 6);
if (0 && lasttype->isSingleValueType() && !lasttype->isVectorTy() && validVectorSize) // currently disabled due to load/store alignment issues
if (isarray && lasttype != T_int1 && !type_is_ghost(lasttype)) {
if (isvector && jl_special_vector_alignment(ntypes, jlasttype)!=0)
jst->struct_decl = VectorType::get(lasttype, ntypes);
else
jst->struct_decl = ArrayType::get(lasttype, ntypes);
Expand All @@ -397,6 +404,12 @@ static Type *julia_struct_to_llvm(jl_value_t *jt, bool *isboxed)
jst->struct_decl = StructType::get(jl_LLVMContext,ArrayRef<Type*>(&latypes[0],ntypes));
}
}
#ifndef NDEBUG
// If LLVM and Julia disagree about alignment, much trouble ensues, so check it!
unsigned llvm_alignment = jl_ExecutionEngine->getDataLayout().getABITypeAlignment((Type*)jst->struct_decl);
unsigned julia_alignment = jst->alignment;
assert(llvm_alignment==julia_alignment);
#endif
}
return (Type*)jst->struct_decl;
}
Expand Down Expand Up @@ -432,7 +445,7 @@ static bool is_tupletype_homogeneous(jl_svec_t *t)
static bool deserves_sret(jl_value_t *dt, Type *T)
{
assert(jl_is_datatype(dt));
return (size_t)jl_datatype_size(dt) > sizeof(void*) && !T->isFloatingPointTy();
return (size_t)jl_datatype_size(dt) > sizeof(void*) && !T->isFloatingPointTy() && !T->isVectorTy();
}

// --- generating various field accessors ---
Expand Down Expand Up @@ -750,10 +763,28 @@ static Value *emit_bounds_check(const jl_cgval_t &ainfo, jl_value_t *ty, Value *

// --- loading and storing ---

// If given alignment is 0 and LLVM's assumed alignment for a load/store via ptr
// might be stricter than the Julia alignment for jltype, return the alignment of jltype.
// Otherwise return the given alignment.
//
// Parameter ptr should be the pointer argument for the LoadInst or StoreInst.
// It is currently unused, but might be used in the future for a more precise answer.
static unsigned julia_alignment(Value* /*ptr*/, jl_value_t *jltype, unsigned alignment) {
if (!alignment && ((jl_datatype_t*)jltype)->alignment > MAX_ALIGN) {
// Type's natural alignment exceeds strictest alignment promised in heap, so return the heap alignment.
return MAX_ALIGN;
}
return alignment;
}

static LoadInst *build_load (Value *ptr, jl_value_t *jltype) {
return builder.CreateAlignedLoad(ptr, julia_alignment(ptr, jltype, 0));
}

static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt);

static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype,
jl_codectx_t *ctx, MDNode *tbaa, size_t alignment = 0)
jl_codectx_t *ctx, MDNode *tbaa, unsigned alignment = 0)
{
bool isboxed;
Type *elty = julia_type_to_llvm(jltype, &isboxed);
Expand All @@ -778,9 +809,7 @@ static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype,
// elt = data;
//}
//else {
if (data->getType()->getContainedType(0)->isVectorTy() && !alignment)
alignment = ((jl_datatype_t*)jltype)->alignment; // prevent llvm from assuming 32 byte alignment of vectors
Instruction *load = builder.CreateAlignedLoad(data, alignment, false);
Instruction *load = builder.CreateAlignedLoad(data, julia_alignment(data, jltype, alignment), false);
if (tbaa) {
elt = tbaa_decorate(tbaa, load);
}
Expand All @@ -799,7 +828,7 @@ static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype,
static void typed_store(Value *ptr, Value *idx_0based, const jl_cgval_t &rhs,
jl_value_t *jltype, jl_codectx_t *ctx, MDNode *tbaa,
Value *parent, // for the write barrier, NULL if no barrier needed
size_t alignment = 0, bool root_box = true) // if the value to store needs a box, should we root it ?
unsigned alignment = 0, bool root_box = true) // if the value to store needs a box, should we root it ?
{
Type *elty = julia_type_to_llvm(jltype);
assert(elty != NULL);
Expand All @@ -821,9 +850,7 @@ static void typed_store(Value *ptr, Value *idx_0based, const jl_cgval_t &rhs,
data = builder.CreateBitCast(ptr, PointerType::get(elty, 0));
else
data = ptr;
if (data->getType()->getContainedType(0)->isVectorTy() && !alignment)
alignment = ((jl_datatype_t*)jltype)->alignment; // prevent llvm from assuming 32 byte alignment of vectors
Instruction *store = builder.CreateAlignedStore(r, builder.CreateGEP(data, idx_0based), alignment);
Instruction *store = builder.CreateAlignedStore(r, builder.CreateGEP(data, idx_0based), julia_alignment(r, jltype, alignment));
if (tbaa)
tbaa_decorate(tbaa, store);
}
Expand Down Expand Up @@ -1026,27 +1053,34 @@ static jl_cgval_t emit_getfield_knownidx(const jl_cgval_t &strct, unsigned idx,
}
else {
int align = jl_field_offset(jt,idx);
if (align & 1) align = 1;
else if (align & 2) align = 2;
else if (align & 4) align = 4;
else if (align & 8) align = 8;
else align = 16;
align |= 16;
align &= -align;
return typed_load(addr, ConstantInt::get(T_size, 0), jfty, ctx, tbaa, align);
}
}
else if (strct.ispointer) { // something stack allocated
Value *addr = builder.CreateConstInBoundsGEP2_32(
LLVM37_param(julia_type_to_llvm(strct.typ))
strct.V, 0, idx);
Value *addr;
if (is_vecelement_type((jl_value_t*)jt))
// VecElement types are unwrapped in LLVM.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this branch doesn't make sense to me. since ispointer is set, the value is expected to be a T*

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, nevermind. because it's a VecElement, we know that idx is 0 (it has one element), in which case we would be emitting gep V, 0, 0, and can simply drop it as a no-op

addr = strct.V;
else
addr = builder.CreateConstInBoundsGEP2_32(
LLVM37_param(julia_type_to_llvm(strct.typ))
strct.V, 0, idx);
assert(!jt->mutabl);
jl_cgval_t fieldval = mark_julia_slot(addr, jfty);
fieldval.isimmutable = strct.isimmutable;
fieldval.gcroot = strct.gcroot;
return fieldval;
}
else {
assert(strct.V->getType()->isVectorTy());
fldv = builder.CreateExtractElement(strct.V, ConstantInt::get(T_int32, idx));
if (strct.V->getType()->isVectorTy()) {
fldv = builder.CreateExtractElement(strct.V, ConstantInt::get(T_int32, idx));
} else {
// VecElement types are unwrapped in LLVM.
assert( strct.V->getType()->isSingleValueType() );
fldv = strct.V;
}
if (jfty == (jl_value_t*)jl_bool_type) {
fldv = builder.CreateTrunc(fldv, T_int1);
}
Expand Down Expand Up @@ -1376,7 +1410,7 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted)
assert(!type_is_ghost(t)); // should have been handled by isghost above!

if (vinfo.ispointer)
v = builder.CreateLoad(builder.CreatePointerCast(v, t->getPointerTo()));
v = build_load( builder.CreatePointerCast(v, t->getPointerTo()), vinfo.typ );

if (t == T_int1)
return julia_bool(v);
Expand Down Expand Up @@ -1548,11 +1582,8 @@ static void emit_setfield(jl_datatype_t *sty, const jl_cgval_t &strct, size_t id
}
else {
int align = jl_field_offset(sty, idx0);
if (align & 1) align = 1;
else if (align & 2) align = 2;
else if (align & 4) align = 4;
else if (align & 8) align = 8;
else align = 16;
align |= 16;
align &= -align;
typed_store(addr, ConstantInt::get(T_size, 0), rhs, jfty, ctx, sty->mutabl ? tbaa_user : tbaa_immut, data_pointer(strct, ctx, T_pjlvalue), align);
}
}
Expand Down Expand Up @@ -1594,8 +1625,13 @@ static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **arg
fval = builder.CreateZExt(fval, T_int8);
if (lt->isVectorTy())
strct = builder.CreateInsertElement(strct, fval, ConstantInt::get(T_int32,idx));
else
else if (lt->isAggregateType())
strct = builder.CreateInsertValue(strct, fval, ArrayRef<unsigned>(&idx,1));
else {
// Must be a VecElement type, which comes unwrapped in LLVM.
assert(is_vecelement_type(ty));
strct = fval;
}
}
idx++;
}
Expand Down
2 changes: 1 addition & 1 deletion src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3936,7 +3936,7 @@ static Function *gen_jlcall_wrapper(jl_lambda_info_t *lam, Function *f, bool sre
if (lty != NULL && !isboxed) {
theArg = builder.CreatePointerCast(theArg, PointerType::get(lty,0));
if (!lty->isAggregateType()) // keep "aggregate" type values in place as pointers
theArg = builder.CreateLoad(theArg);
theArg = build_load(theArg, ty);
}
assert(dyn_cast<UndefValue>(theArg) == NULL);
args[idx] = theArg;
Expand Down
1 change: 1 addition & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -833,6 +833,7 @@ void jl_get_builtin_hooks(void)
jl_ascii_string_type = (jl_datatype_t*)core("ASCIIString");
jl_utf8_string_type = (jl_datatype_t*)core("UTF8String");
jl_weakref_type = (jl_datatype_t*)core("WeakRef");
jl_vecelement_typename = ((jl_datatype_t*)core("VecElement"))->name;
}

JL_DLLEXPORT void jl_get_system_hooks(void)
Expand Down
8 changes: 6 additions & 2 deletions src/intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,9 +305,13 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt)
if (jt == (jl_value_t*)jl_bool_type)
return builder.CreateZExt(builder.CreateTrunc(builder.CreateLoad(p), T_int1), to);

if (!x.isboxed) // stack has default alignment
if (x.isboxed)
return builder.CreateAlignedLoad(p, 16); // julia's gc gives 16-byte aligned addresses
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did @jrevels' recent PR change this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR only changes the array alignment and the GC only gives 16-byte aligned addresses for large enough objects.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the alignment here still too large for small objects on 32bits?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the alignment is too large if GC is not guaranteeing 16-byte alignment for small objects. For that matter, providing an alignment bigger than the object seems unlikely to provide any benefit, except for a few highly contrived cases. (Exercise left to the student :-). Can someone provide a concise summary of the GC alignment guarantees? E.g., a function that maps "object size" x "object LLVM alignment" -> "min guaranteed alignment in Julia heap"?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On 64bit I believe we garentee 16-byte alignment for GC allocated memory for all objects except singleton (although you don't really need to load from it...)

On 32bits, we garentee at least 4-byte alignment for objects smaller than 8 bytes, for anything larger than or equal to 8 bytes, we should have 16-byte alignment. Object with a non-zero size no larger than 4 bytes should also always be 8-byte aligned.....

else if (jt)
return build_load(p, jt);
else
// stack has default alignment
return builder.CreateLoad(p);
return builder.CreateAlignedLoad(p, 16); // julia's gc gives 16-byte aligned addresses
}

// unbox, trying to determine correct bitstype automatically
Expand Down
1 change: 1 addition & 0 deletions src/jltypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ jl_tupletype_t *jl_anytuple_type;
jl_datatype_t *jl_anytuple_type_type;
jl_datatype_t *jl_ntuple_type;
jl_typename_t *jl_ntuple_typename;
jl_typename_t *jl_vecelement_typename;
jl_datatype_t *jl_vararg_type;
jl_datatype_t *jl_tvar_type;
jl_datatype_t *jl_uniontype_type;
Expand Down
7 changes: 7 additions & 0 deletions src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,7 @@ extern JL_DLLEXPORT jl_datatype_t *jl_slotnumber_type;
extern JL_DLLEXPORT jl_datatype_t *jl_typedslot_type;
extern JL_DLLEXPORT jl_datatype_t *jl_simplevector_type;
extern JL_DLLEXPORT jl_typename_t *jl_tuple_typename;
extern JL_DLLEXPORT jl_typename_t *jl_vecelement_typename;
extern JL_DLLEXPORT jl_datatype_t *jl_anytuple_type;
#define jl_tuple_type jl_anytuple_type
extern JL_DLLEXPORT jl_datatype_t *jl_anytuple_type_type;
Expand Down Expand Up @@ -940,6 +941,12 @@ STATIC_INLINE int jl_is_tuple_type(void *t)
((jl_datatype_t*)(t))->name == jl_tuple_typename);
}

STATIC_INLINE int is_vecelement_type(jl_value_t* t)
{
return (jl_is_datatype(t) &&
((jl_datatype_t*)(t))->name == jl_vecelement_typename);
}

STATIC_INLINE int jl_is_vararg_type(jl_value_t *v)
{
return (jl_is_datatype(v) &&
Expand Down
1 change: 1 addition & 0 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,7 @@ int sigs_eq(jl_value_t *a, jl_value_t *b, int useenv);

jl_value_t *jl_lookup_match(jl_value_t *a, jl_value_t *b, jl_svec_t **penv, jl_svec_t *tvars);

unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type);

#ifdef __cplusplus
}
Expand Down
2 changes: 1 addition & 1 deletion test/choosetests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ function choosetests(choices = [])
"char", "string", "triplequote", "unicode",
"dates", "dict", "hashing", "remote", "iobuffer", "staged",
"arrayops", "tuple", "subarray", "reduce", "reducedim", "random",
"abstractarray", "intfuncs", "simdloop", "blas", "sparse",
"abstractarray", "intfuncs", "simdloop", "vecelement", "blas", "sparse",
"bitarray", "copy", "math", "fastmath", "functional",
"operators", "path", "ccall", "parse", "loading", "bigint",
"bigfloat", "sorting", "statistics", "spawn", "backtrace",
Expand Down
Loading