diff --git a/base/boot.jl b/base/boot.jl index faec93a47fe9a..2545be0b75713 100644 --- a/base/boot.jl +++ b/base/boot.jl @@ -125,7 +125,7 @@ export SimpleVector, AbstractArray, DenseArray, # special objects Function, LambdaInfo, Method, MethodTable, TypeMapEntry, TypeMapLevel, - Module, Symbol, Task, Array, WeakRef, + Module, Symbol, Task, Array, WeakRef, VecElement, # numeric types Number, Real, Integer, Bool, Ref, Ptr, AbstractFloat, Float16, Float32, Float64, @@ -271,6 +271,10 @@ TypeConstructor(p::ANY, t::ANY) = Void() = nothing +immutable VecElement{T} + value::T +end + Expr(args::ANY...) = _expr(args...) _new(typ::Symbol, argty::Symbol) = eval(:((::Type{$typ})(n::$argty) = $(Expr(:new, typ, :n)))) diff --git a/src/alloc.c b/src/alloc.c index 568c493b27442..da48b37aea30a 100644 --- a/src/alloc.c +++ b/src/alloc.c @@ -822,10 +822,44 @@ JL_DLLEXPORT jl_datatype_t *jl_new_uninitialized_datatype(size_t nfields, int8_t return t; } +// Determine if homogeneous tuple with fields of type t will have +// a special alignment beyond normal Julia rules. +// Return special alignment if one exists, 0 if normal alignment rules hold. +// A non-zero result *must* match the LLVM rules for a vector type . +// For sake of Ahead-Of-Time (AOT) compilation, this routine has to work +// without LLVM being available. +unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t) { + if (!is_vecelement_type(t)) + return 0; + if (nfields>16 || (1<8 || (1<alignment; alignment*=2 ) + continue; + return alignment; +} + void jl_compute_field_offsets(jl_datatype_t *st) { size_t sz = 0, alignm = 1; int ptrfree = 1; + int homogeneous = 1; + jl_value_t *lastty = NULL; assert(0 <= st->fielddesc_type && st->fielddesc_type <= 2); @@ -862,12 +896,21 @@ void jl_compute_field_offsets(jl_datatype_t *st) if (al > alignm) alignm = al; } + homogeneous &= lastty==NULL || lastty==ty; + lastty = ty; jl_field_setoffset(st, i, sz); jl_field_setsize(st, i, fsz); if (__unlikely(max_offset - sz < fsz)) jl_throw(jl_overflow_exception); sz += fsz; } + if (homogeneous && lastty!=NULL && jl_is_tuple_type(st)) { + // Some tuples become LLVM vectors with stronger alignment than what was calculated above. + unsigned al = jl_special_vector_alignment(jl_datatype_nfields(st), lastty); + assert(al % alignm == 0); + if (al) + alignm = al; + } st->alignment = alignm; st->size = LLT_ALIGN(sz, alignm); if (st->size > sz) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 9a24d784f91cb..7b4cca3dbf374 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -365,30 +365,37 @@ static Type *julia_struct_to_llvm(jl_value_t *jt, bool *isboxed) } std::vector latypes(0); size_t i; + bool isarray = true; bool isvector = true; + jl_value_t* jlasttype = NULL; Type *lasttype = NULL; for(i = 0; i < ntypes; i++) { jl_value_t *ty = jl_svecref(jst->types, i); + if (jlasttype!=NULL && ty!=jlasttype) + isvector = false; + jlasttype = ty; Type *lty; if (jl_field_isptr(jst, i)) lty = T_pjlvalue; else lty = ty==(jl_value_t*)jl_bool_type ? T_int8 : julia_type_to_llvm(ty); if (lasttype != NULL && lasttype != lty) - isvector = false; + isarray = false; lasttype = lty; if (type_is_ghost(lty)) lty = NoopType; latypes.push_back(lty); } if (!isTuple) { - structdecl->setBody(latypes); + if (is_vecelement_type(jt)) + // VecElement type is unwrapped in LLVM + jst->struct_decl = latypes[0]; + else + structdecl->setBody(latypes); } else { - if (isvector && lasttype != T_int1 && !type_is_ghost(lasttype)) { - // TODO: currently we get LLVM assertion failures for other vector sizes - bool validVectorSize = (ntypes == 2 || ntypes == 4 || ntypes == 6); - if (0 && lasttype->isSingleValueType() && !lasttype->isVectorTy() && validVectorSize) // currently disabled due to load/store alignment issues + if (isarray && lasttype != T_int1 && !type_is_ghost(lasttype)) { + if (isvector && jl_special_vector_alignment(ntypes, jlasttype)!=0) jst->struct_decl = VectorType::get(lasttype, ntypes); else jst->struct_decl = ArrayType::get(lasttype, ntypes); @@ -397,6 +404,12 @@ static Type *julia_struct_to_llvm(jl_value_t *jt, bool *isboxed) jst->struct_decl = StructType::get(jl_LLVMContext,ArrayRef(&latypes[0],ntypes)); } } +#ifndef NDEBUG + // If LLVM and Julia disagree about alignment, much trouble ensues, so check it! + unsigned llvm_alignment = jl_ExecutionEngine->getDataLayout().getABITypeAlignment((Type*)jst->struct_decl); + unsigned julia_alignment = jst->alignment; + assert(llvm_alignment==julia_alignment); +#endif } return (Type*)jst->struct_decl; } @@ -432,7 +445,7 @@ static bool is_tupletype_homogeneous(jl_svec_t *t) static bool deserves_sret(jl_value_t *dt, Type *T) { assert(jl_is_datatype(dt)); - return (size_t)jl_datatype_size(dt) > sizeof(void*) && !T->isFloatingPointTy(); + return (size_t)jl_datatype_size(dt) > sizeof(void*) && !T->isFloatingPointTy() && !T->isVectorTy(); } // --- generating various field accessors --- @@ -750,10 +763,28 @@ static Value *emit_bounds_check(const jl_cgval_t &ainfo, jl_value_t *ty, Value * // --- loading and storing --- +// If given alignment is 0 and LLVM's assumed alignment for a load/store via ptr +// might be stricter than the Julia alignment for jltype, return the alignment of jltype. +// Otherwise return the given alignment. +// +// Parameter ptr should be the pointer argument for the LoadInst or StoreInst. +// It is currently unused, but might be used in the future for a more precise answer. +static unsigned julia_alignment(Value* /*ptr*/, jl_value_t *jltype, unsigned alignment) { + if (!alignment && ((jl_datatype_t*)jltype)->alignment > MAX_ALIGN) { + // Type's natural alignment exceeds strictest alignment promised in heap, so return the heap alignment. + return MAX_ALIGN; + } + return alignment; +} + +static LoadInst *build_load (Value *ptr, jl_value_t *jltype) { + return builder.CreateAlignedLoad(ptr, julia_alignment(ptr, jltype, 0)); +} + static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt); static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype, - jl_codectx_t *ctx, MDNode *tbaa, size_t alignment = 0) + jl_codectx_t *ctx, MDNode *tbaa, unsigned alignment = 0) { bool isboxed; Type *elty = julia_type_to_llvm(jltype, &isboxed); @@ -778,9 +809,7 @@ static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype, // elt = data; //} //else { - if (data->getType()->getContainedType(0)->isVectorTy() && !alignment) - alignment = ((jl_datatype_t*)jltype)->alignment; // prevent llvm from assuming 32 byte alignment of vectors - Instruction *load = builder.CreateAlignedLoad(data, alignment, false); + Instruction *load = builder.CreateAlignedLoad(data, julia_alignment(data, jltype, alignment), false); if (tbaa) { elt = tbaa_decorate(tbaa, load); } @@ -799,7 +828,7 @@ static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype, static void typed_store(Value *ptr, Value *idx_0based, const jl_cgval_t &rhs, jl_value_t *jltype, jl_codectx_t *ctx, MDNode *tbaa, Value *parent, // for the write barrier, NULL if no barrier needed - size_t alignment = 0, bool root_box = true) // if the value to store needs a box, should we root it ? + unsigned alignment = 0, bool root_box = true) // if the value to store needs a box, should we root it ? { Type *elty = julia_type_to_llvm(jltype); assert(elty != NULL); @@ -821,9 +850,7 @@ static void typed_store(Value *ptr, Value *idx_0based, const jl_cgval_t &rhs, data = builder.CreateBitCast(ptr, PointerType::get(elty, 0)); else data = ptr; - if (data->getType()->getContainedType(0)->isVectorTy() && !alignment) - alignment = ((jl_datatype_t*)jltype)->alignment; // prevent llvm from assuming 32 byte alignment of vectors - Instruction *store = builder.CreateAlignedStore(r, builder.CreateGEP(data, idx_0based), alignment); + Instruction *store = builder.CreateAlignedStore(r, builder.CreateGEP(data, idx_0based), julia_alignment(r, jltype, alignment)); if (tbaa) tbaa_decorate(tbaa, store); } @@ -1026,18 +1053,20 @@ static jl_cgval_t emit_getfield_knownidx(const jl_cgval_t &strct, unsigned idx, } else { int align = jl_field_offset(jt,idx); - if (align & 1) align = 1; - else if (align & 2) align = 2; - else if (align & 4) align = 4; - else if (align & 8) align = 8; - else align = 16; + align |= 16; + align &= -align; return typed_load(addr, ConstantInt::get(T_size, 0), jfty, ctx, tbaa, align); } } else if (strct.ispointer) { // something stack allocated - Value *addr = builder.CreateConstInBoundsGEP2_32( - LLVM37_param(julia_type_to_llvm(strct.typ)) - strct.V, 0, idx); + Value *addr; + if (is_vecelement_type((jl_value_t*)jt)) + // VecElement types are unwrapped in LLVM. + addr = strct.V; + else + addr = builder.CreateConstInBoundsGEP2_32( + LLVM37_param(julia_type_to_llvm(strct.typ)) + strct.V, 0, idx); assert(!jt->mutabl); jl_cgval_t fieldval = mark_julia_slot(addr, jfty); fieldval.isimmutable = strct.isimmutable; @@ -1045,8 +1074,13 @@ static jl_cgval_t emit_getfield_knownidx(const jl_cgval_t &strct, unsigned idx, return fieldval; } else { - assert(strct.V->getType()->isVectorTy()); - fldv = builder.CreateExtractElement(strct.V, ConstantInt::get(T_int32, idx)); + if (strct.V->getType()->isVectorTy()) { + fldv = builder.CreateExtractElement(strct.V, ConstantInt::get(T_int32, idx)); + } else { + // VecElement types are unwrapped in LLVM. + assert( strct.V->getType()->isSingleValueType() ); + fldv = strct.V; + } if (jfty == (jl_value_t*)jl_bool_type) { fldv = builder.CreateTrunc(fldv, T_int1); } @@ -1376,7 +1410,7 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted) assert(!type_is_ghost(t)); // should have been handled by isghost above! if (vinfo.ispointer) - v = builder.CreateLoad(builder.CreatePointerCast(v, t->getPointerTo())); + v = build_load( builder.CreatePointerCast(v, t->getPointerTo()), vinfo.typ ); if (t == T_int1) return julia_bool(v); @@ -1548,11 +1582,8 @@ static void emit_setfield(jl_datatype_t *sty, const jl_cgval_t &strct, size_t id } else { int align = jl_field_offset(sty, idx0); - if (align & 1) align = 1; - else if (align & 2) align = 2; - else if (align & 4) align = 4; - else if (align & 8) align = 8; - else align = 16; + align |= 16; + align &= -align; typed_store(addr, ConstantInt::get(T_size, 0), rhs, jfty, ctx, sty->mutabl ? tbaa_user : tbaa_immut, data_pointer(strct, ctx, T_pjlvalue), align); } } @@ -1594,8 +1625,13 @@ static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **arg fval = builder.CreateZExt(fval, T_int8); if (lt->isVectorTy()) strct = builder.CreateInsertElement(strct, fval, ConstantInt::get(T_int32,idx)); - else + else if (lt->isAggregateType()) strct = builder.CreateInsertValue(strct, fval, ArrayRef(&idx,1)); + else { + // Must be a VecElement type, which comes unwrapped in LLVM. + assert(is_vecelement_type(ty)); + strct = fval; + } } idx++; } diff --git a/src/codegen.cpp b/src/codegen.cpp index f297a828538ed..e5ba4177ff5d9 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -3936,7 +3936,7 @@ static Function *gen_jlcall_wrapper(jl_lambda_info_t *lam, Function *f, bool sre if (lty != NULL && !isboxed) { theArg = builder.CreatePointerCast(theArg, PointerType::get(lty,0)); if (!lty->isAggregateType()) // keep "aggregate" type values in place as pointers - theArg = builder.CreateLoad(theArg); + theArg = build_load(theArg, ty); } assert(dyn_cast(theArg) == NULL); args[idx] = theArg; diff --git a/src/init.c b/src/init.c index 2602b1b72cfc4..7730e84f37e5a 100644 --- a/src/init.c +++ b/src/init.c @@ -833,6 +833,7 @@ void jl_get_builtin_hooks(void) jl_ascii_string_type = (jl_datatype_t*)core("ASCIIString"); jl_utf8_string_type = (jl_datatype_t*)core("UTF8String"); jl_weakref_type = (jl_datatype_t*)core("WeakRef"); + jl_vecelement_typename = ((jl_datatype_t*)core("VecElement"))->name; } JL_DLLEXPORT void jl_get_system_hooks(void) diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index e0d5e731d1768..f0088a8e0f70f 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -305,9 +305,13 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt) if (jt == (jl_value_t*)jl_bool_type) return builder.CreateZExt(builder.CreateTrunc(builder.CreateLoad(p), T_int1), to); - if (!x.isboxed) // stack has default alignment + if (x.isboxed) + return builder.CreateAlignedLoad(p, 16); // julia's gc gives 16-byte aligned addresses + else if (jt) + return build_load(p, jt); + else + // stack has default alignment return builder.CreateLoad(p); - return builder.CreateAlignedLoad(p, 16); // julia's gc gives 16-byte aligned addresses } // unbox, trying to determine correct bitstype automatically diff --git a/src/jltypes.c b/src/jltypes.c index 89fa755777e5d..e6ccb247d8eb6 100644 --- a/src/jltypes.c +++ b/src/jltypes.c @@ -35,6 +35,7 @@ jl_tupletype_t *jl_anytuple_type; jl_datatype_t *jl_anytuple_type_type; jl_datatype_t *jl_ntuple_type; jl_typename_t *jl_ntuple_typename; +jl_typename_t *jl_vecelement_typename; jl_datatype_t *jl_vararg_type; jl_datatype_t *jl_tvar_type; jl_datatype_t *jl_uniontype_type; diff --git a/src/julia.h b/src/julia.h index 2d204df89aba3..1aff94c5940aa 100644 --- a/src/julia.h +++ b/src/julia.h @@ -441,6 +441,7 @@ extern JL_DLLEXPORT jl_datatype_t *jl_slotnumber_type; extern JL_DLLEXPORT jl_datatype_t *jl_typedslot_type; extern JL_DLLEXPORT jl_datatype_t *jl_simplevector_type; extern JL_DLLEXPORT jl_typename_t *jl_tuple_typename; +extern JL_DLLEXPORT jl_typename_t *jl_vecelement_typename; extern JL_DLLEXPORT jl_datatype_t *jl_anytuple_type; #define jl_tuple_type jl_anytuple_type extern JL_DLLEXPORT jl_datatype_t *jl_anytuple_type_type; @@ -940,6 +941,12 @@ STATIC_INLINE int jl_is_tuple_type(void *t) ((jl_datatype_t*)(t))->name == jl_tuple_typename); } +STATIC_INLINE int is_vecelement_type(jl_value_t* t) +{ + return (jl_is_datatype(t) && + ((jl_datatype_t*)(t))->name == jl_vecelement_typename); +} + STATIC_INLINE int jl_is_vararg_type(jl_value_t *v) { return (jl_is_datatype(v) && diff --git a/src/julia_internal.h b/src/julia_internal.h index 8f828a4927a16..e4ae724743b03 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -585,6 +585,7 @@ int sigs_eq(jl_value_t *a, jl_value_t *b, int useenv); jl_value_t *jl_lookup_match(jl_value_t *a, jl_value_t *b, jl_svec_t **penv, jl_svec_t *tvars); +unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type); #ifdef __cplusplus } diff --git a/test/choosetests.jl b/test/choosetests.jl index 892a57feeb830..0864267316921 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -19,7 +19,7 @@ function choosetests(choices = []) "char", "string", "triplequote", "unicode", "dates", "dict", "hashing", "remote", "iobuffer", "staged", "arrayops", "tuple", "subarray", "reduce", "reducedim", "random", - "abstractarray", "intfuncs", "simdloop", "blas", "sparse", + "abstractarray", "intfuncs", "simdloop", "vecelement", "blas", "sparse", "bitarray", "copy", "math", "fastmath", "functional", "operators", "path", "ccall", "parse", "loading", "bigint", "bigfloat", "sorting", "statistics", "spawn", "backtrace", diff --git a/test/vecelement.jl b/test/vecelement.jl new file mode 100644 index 0000000000000..d22984590bfbb --- /dev/null +++ b/test/vecelement.jl @@ -0,0 +1,65 @@ +# This file is a part of Julia. License is MIT: http://julialang.org/license + +make_value{T<:Integer}(::Type{T}, i::Integer) = 3*i%T +make_value{T<:AbstractFloat}(::Type{T},i::Integer) = T(3*i) + +typealias Vec{N,T} NTuple{N,Base.VecElement{T}} + +# Crash report for #15244 motivated this test. +@generated function thrice_iota{N,T}(::Type{Vec{N,T}}) + :(tuple($([:(Base.VecElement(make_value($T,$i))) for i in 1:N]...))) +end + +function call_iota(n::Integer,t::DataType) + x = thrice_iota(Vec{n,t}) + @test x[1].value === make_value(t,1) + @test x[n].value === make_value(t,n) +end + +# Try various tuple lengths and element types +for i=1:20 + for t in [Bool, Int8, Int16, Int32, Int64, Float32, Float64] + call_iota(i,t) + end +end + +# Another crash report for #15244 motivated this test. +immutable Bunch{N,T} + elts::NTuple{N,Base.VecElement{T}} +end + +unpeel(x) = x.elts[1].value + +@test unpeel(Bunch{2,Float64}((Base.VecElement(5.0), + Base.VecElement(4.0)))) === 5.0 + +rewrap(x) = VecElement(x.elts[1].value+0) +b = Bunch((VecElement(1.0), VecElement(2.0))) + +@test rewrap(b)===VecElement(1.0) + +immutable Herd{N,T} + elts::NTuple{N,Base.VecElement{T}} + Herd(elts::NTuple{N,T}) = new(ntuple(i->Base.VecElement{T}(elts[i]), N)) +end + +function check{N,T}(x::Herd{N,T}) + for i=1:N + @test x.elts[i].value === N*N+i-1 + end +end + +check(Herd{1,Int}((1,))) +check(Herd{2,Int}((4,5))) +check(Herd{4,Int}((16,17,18,19))) + +immutable Gr{N, T} + u::T + v::Bunch{N,T} + w::T +end + +a = Vector{Gr{2,Float64}}(2) +a[2] = Gr(1.0, Bunch((VecElement(2.0), VecElement(3.0))), 4.0) +a[1] = Gr(5.0, Bunch((VecElement(6.0), VecElement(7.0))), 8.0) +@test a[2] == Gr(1.0, Bunch((VecElement(2.0), VecElement(3.0))), 4.0)