JuliaLang · ArchRobison · Apr 30, 2016 · Feb 25, 2016 · vtjnash · Apr 29, 2016
diff --git a/base/boot.jl b/base/boot.jl
@@ -125,7 +125,7 @@ export
     SimpleVector, AbstractArray, DenseArray,
     # special objects
     Function, LambdaInfo, Method, MethodTable, TypeMapEntry, TypeMapLevel,
-    Module, Symbol, Task, Array, WeakRef,
+    Module, Symbol, Task, Array, WeakRef, VecElement,
     # numeric types
     Number, Real, Integer, Bool, Ref, Ptr,
     AbstractFloat, Float16, Float32, Float64,
@@ -271,6 +271,10 @@ TypeConstructor(p::ANY, t::ANY) =
 
 Void() = nothing
 
+immutable VecElement{T}
+    value::T
+end
+
 Expr(args::ANY...) = _expr(args...)
 
 _new(typ::Symbol, argty::Symbol) = eval(:((::Type{$typ})(n::$argty) = $(Expr(:new, typ, :n))))

diff --git a/src/alloc.c b/src/alloc.c
@@ -822,10 +822,44 @@ JL_DLLEXPORT jl_datatype_t *jl_new_uninitialized_datatype(size_t nfields, int8_t
     return t;
 }
 
+// Determine if homogeneous tuple with fields of type t will have
+// a special alignment beyond normal Julia rules.
+// Return special alignment if one exists, 0 if normal alignment rules hold.
+// A non-zero result *must* match the LLVM rules for a vector type <nfields x t>.
+// For sake of Ahead-Of-Time (AOT) compilation, this routine has to work
+// without LLVM being available.
+unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t) {
+    if (!is_vecelement_type(t))
+        return 0;
+    if (nfields>16 || (1<<nfields & 0x1157C) == 0)
+        // Number of fields is not 2, 3, 4, 5, 6, 8, 10, 12, or 16.
+        return 0;
+    assert(jl_datatype_nfields(t)==1);
+    jl_value_t *ty = jl_field_type(t, 0);
+    if( !jl_is_bitstype(ty) )
+        // LLVM requires that a vector element be a primitive type.
+        // LLVM allows pointer types as vector elements, but until a
+        // motivating use case comes up for Julia, we reject pointers.
+        return 0;
+    size_t elsz = jl_datatype_size(ty);
+    if (elsz>8 || (1<<elsz & 0x116) == 0)
+        // Element size is not 1, 2, 4, or 8.
+        return 0;
+    size_t size = nfields*elsz;
+    // LLVM's alignment rule for vectors seems to be to round up to
+    // a power of two, even if that's overkill for the target hardware.
+    size_t alignment=1;
+    for( ; size>alignment; alignment*=2 )
+        continue;
+    return alignment;
+}
+
 void jl_compute_field_offsets(jl_datatype_t *st)
 {
     size_t sz = 0, alignm = 1;
     int ptrfree = 1;
+    int homogeneous = 1;
+    jl_value_t *lastty = NULL;
 
     assert(0 <= st->fielddesc_type && st->fielddesc_type <= 2);
 
@@ -862,12 +896,21 @@ void jl_compute_field_offsets(jl_datatype_t *st)
             if (al > alignm)
                 alignm = al;
         }
+        homogeneous &= lastty==NULL || lastty==ty;
+        lastty = ty;
         jl_field_setoffset(st, i, sz);
         jl_field_setsize(st, i, fsz);
         if (__unlikely(max_offset - sz < fsz))
             jl_throw(jl_overflow_exception);
         sz += fsz;
     }
+    if (homogeneous && lastty!=NULL && jl_is_tuple_type(st)) {
+        // Some tuples become LLVM vectors with stronger alignment than what was calculated above.
+        unsigned al = jl_special_vector_alignment(jl_datatype_nfields(st), lastty);
+        assert(al % alignm == 0);
+        if (al)
+            alignm = al;
+    }
     st->alignment = alignm;
     st->size = LLT_ALIGN(sz, alignm);
     if (st->size > sz)

diff --git a/src/cgutils.cpp b/src/cgutils.cpp
@@ -365,30 +365,37 @@ static Type *julia_struct_to_llvm(jl_value_t *jt, bool *isboxed)
             }
             std::vector<Type*> latypes(0);
             size_t i;
+            bool isarray = true;
             bool isvector = true;
+            jl_value_t* jlasttype = NULL;
             Type *lasttype = NULL;
             for(i = 0; i < ntypes; i++) {
                 jl_value_t *ty = jl_svecref(jst->types, i);
+                if (jlasttype!=NULL && ty!=jlasttype)
+                    isvector = false;
+                jlasttype = ty;
                 Type *lty;
                 if (jl_field_isptr(jst, i))
                     lty = T_pjlvalue;
                 else
                     lty = ty==(jl_value_t*)jl_bool_type ? T_int8 : julia_type_to_llvm(ty);
                 if (lasttype != NULL && lasttype != lty)
-                    isvector = false;
+                    isarray = false;
                 lasttype = lty;
                 if (type_is_ghost(lty))
                     lty = NoopType;
                 latypes.push_back(lty);
             }
             if (!isTuple) {
-                structdecl->setBody(latypes);
+                if (is_vecelement_type(jt))
+                    // VecElement type is unwrapped in LLVM
+                    jst->struct_decl = latypes[0];
+                else
+                    structdecl->setBody(latypes);
             }
             else {
-                if (isvector && lasttype != T_int1 && !type_is_ghost(lasttype)) {
-                    // TODO: currently we get LLVM assertion failures for other vector sizes
-                    bool validVectorSize = (ntypes == 2 || ntypes == 4 || ntypes == 6);
-                    if (0 && lasttype->isSingleValueType() && !lasttype->isVectorTy() && validVectorSize) // currently disabled due to load/store alignment issues
+                if (isarray && lasttype != T_int1 && !type_is_ghost(lasttype)) {
+                    if (isvector && jl_special_vector_alignment(ntypes, jlasttype)!=0)
                         jst->struct_decl = VectorType::get(lasttype, ntypes);
                     else
                         jst->struct_decl = ArrayType::get(lasttype, ntypes);
@@ -397,6 +404,12 @@ static Type *julia_struct_to_llvm(jl_value_t *jt, bool *isboxed)
                     jst->struct_decl = StructType::get(jl_LLVMContext,ArrayRef<Type*>(&latypes[0],ntypes));
                 }
             }
+#ifndef NDEBUG
+            // If LLVM and Julia disagree about alignment, much trouble ensues, so check it!
+            unsigned llvm_alignment = jl_ExecutionEngine->getDataLayout().getABITypeAlignment((Type*)jst->struct_decl);
+            unsigned julia_alignment = jst->alignment;
+            assert(llvm_alignment==julia_alignment);
+#endif
         }
         return (Type*)jst->struct_decl;
     }
@@ -432,7 +445,7 @@ static bool is_tupletype_homogeneous(jl_svec_t *t)
 static bool deserves_sret(jl_value_t *dt, Type *T)
 {
     assert(jl_is_datatype(dt));
-    return (size_t)jl_datatype_size(dt) > sizeof(void*) && !T->isFloatingPointTy();
+    return (size_t)jl_datatype_size(dt) > sizeof(void*) && !T->isFloatingPointTy() && !T->isVectorTy();
 }
 
 // --- generating various field accessors ---
@@ -750,10 +763,28 @@ static Value *emit_bounds_check(const jl_cgval_t &ainfo, jl_value_t *ty, Value *
 
 // --- loading and storing ---
 
+// If given alignment is 0 and LLVM's assumed alignment for a load/store via ptr
+// might be stricter than the Julia alignment for jltype, return the alignment of jltype.
+// Otherwise return the given alignment.
+//
+// Parameter ptr should be the pointer argument for the LoadInst or StoreInst.
+// It is currently unused, but might be used in the future for a more precise answer.
+static unsigned julia_alignment(Value* /*ptr*/, jl_value_t *jltype, unsigned alignment) {
+    if (!alignment && ((jl_datatype_t*)jltype)->alignment > MAX_ALIGN) {
+        // Type's natural alignment exceeds strictest alignment promised in heap, so return the heap alignment.
+        return MAX_ALIGN;
+    }
+    return alignment;
+}
+
+static LoadInst *build_load (Value *ptr, jl_value_t *jltype) {
+    return builder.CreateAlignedLoad(ptr, julia_alignment(ptr, jltype, 0));
+}
+
 static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt);
 
 static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype,
-                             jl_codectx_t *ctx, MDNode *tbaa, size_t alignment = 0)
+                             jl_codectx_t *ctx, MDNode *tbaa, unsigned alignment = 0)
 {
     bool isboxed;
     Type *elty = julia_type_to_llvm(jltype, &isboxed);
@@ -778,9 +809,7 @@ static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype,
     //    elt = data;
     //}
     //else {
-        if (data->getType()->getContainedType(0)->isVectorTy() && !alignment)
-            alignment = ((jl_datatype_t*)jltype)->alignment; // prevent llvm from assuming 32 byte alignment of vectors
-        Instruction *load = builder.CreateAlignedLoad(data, alignment, false);
+        Instruction *load = builder.CreateAlignedLoad(data, julia_alignment(data, jltype, alignment), false);
         if (tbaa) {
             elt = tbaa_decorate(tbaa, load);
         }
@@ -799,7 +828,7 @@ static jl_cgval_t typed_load(Value *ptr, Value *idx_0based, jl_value_t *jltype,
 static void typed_store(Value *ptr, Value *idx_0based, const jl_cgval_t &rhs,
                         jl_value_t *jltype, jl_codectx_t *ctx, MDNode *tbaa,
                         Value *parent,  // for the write barrier, NULL if no barrier needed
-                        size_t alignment = 0, bool root_box = true) // if the value to store needs a box, should we root it ?
+                        unsigned alignment = 0, bool root_box = true) // if the value to store needs a box, should we root it ?
 {
     Type *elty = julia_type_to_llvm(jltype);
     assert(elty != NULL);
@@ -821,9 +850,7 @@ static void typed_store(Value *ptr, Value *idx_0based, const jl_cgval_t &rhs,
         data = builder.CreateBitCast(ptr, PointerType::get(elty, 0));
     else
         data = ptr;
-    if (data->getType()->getContainedType(0)->isVectorTy() && !alignment)
-        alignment = ((jl_datatype_t*)jltype)->alignment; // prevent llvm from assuming 32 byte alignment of vectors
-    Instruction *store = builder.CreateAlignedStore(r, builder.CreateGEP(data, idx_0based), alignment);
+    Instruction *store = builder.CreateAlignedStore(r, builder.CreateGEP(data, idx_0based), julia_alignment(r, jltype, alignment));
     if (tbaa)
         tbaa_decorate(tbaa, store);
 }
@@ -1026,27 +1053,34 @@ static jl_cgval_t emit_getfield_knownidx(const jl_cgval_t &strct, unsigned idx,
         }
         else {
             int align = jl_field_offset(jt,idx);
-            if (align & 1) align = 1;
-            else if (align & 2) align = 2;
-            else if (align & 4) align = 4;
-            else if (align & 8) align = 8;
-            else align = 16;
+            align |= 16;
+            align &= -align;
             return typed_load(addr, ConstantInt::get(T_size, 0), jfty, ctx, tbaa, align);
         }
     }
     else if (strct.ispointer) { // something stack allocated
-        Value *addr = builder.CreateConstInBoundsGEP2_32(
-            LLVM37_param(julia_type_to_llvm(strct.typ))
-            strct.V, 0, idx);
+        Value *addr;
+        if (is_vecelement_type((jl_value_t*)jt))
+            // VecElement types are unwrapped in LLVM.
+            addr = strct.V;
+        else
+            addr = builder.CreateConstInBoundsGEP2_32(
+                LLVM37_param(julia_type_to_llvm(strct.typ))
+                strct.V, 0, idx);
         assert(!jt->mutabl);
         jl_cgval_t fieldval = mark_julia_slot(addr, jfty);
         fieldval.isimmutable = strct.isimmutable;
         fieldval.gcroot = strct.gcroot;
         return fieldval;
     }
     else {
-        assert(strct.V->getType()->isVectorTy());
-        fldv = builder.CreateExtractElement(strct.V, ConstantInt::get(T_int32, idx));
+        if (strct.V->getType()->isVectorTy()) {
+            fldv = builder.CreateExtractElement(strct.V, ConstantInt::get(T_int32, idx));
+        } else {
+            // VecElement types are unwrapped in LLVM.
+            assert( strct.V->getType()->isSingleValueType() );
+            fldv = strct.V;
+        }
         if (jfty == (jl_value_t*)jl_bool_type) {
             fldv = builder.CreateTrunc(fldv, T_int1);
         }
@@ -1376,7 +1410,7 @@ static Value *boxed(const jl_cgval_t &vinfo, jl_codectx_t *ctx, bool gcrooted)
     assert(!type_is_ghost(t)); // should have been handled by isghost above!
 
     if (vinfo.ispointer)
-        v = builder.CreateLoad(builder.CreatePointerCast(v, t->getPointerTo()));
+        v = build_load( builder.CreatePointerCast(v, t->getPointerTo()), vinfo.typ );
 
     if (t == T_int1)
         return julia_bool(v);
@@ -1548,11 +1582,8 @@ static void emit_setfield(jl_datatype_t *sty, const jl_cgval_t &strct, size_t id
         }
         else {
             int align = jl_field_offset(sty, idx0);
-            if (align & 1) align = 1;
-            else if (align & 2) align = 2;
-            else if (align & 4) align = 4;
-            else if (align & 8) align = 8;
-            else align = 16;
+            align |= 16;
+            align &= -align;
             typed_store(addr, ConstantInt::get(T_size, 0), rhs, jfty, ctx, sty->mutabl ? tbaa_user : tbaa_immut, data_pointer(strct, ctx, T_pjlvalue), align);
         }
     }
@@ -1594,8 +1625,13 @@ static jl_cgval_t emit_new_struct(jl_value_t *ty, size_t nargs, jl_value_t **arg
                         fval = builder.CreateZExt(fval, T_int8);
                     if (lt->isVectorTy())
                         strct = builder.CreateInsertElement(strct, fval, ConstantInt::get(T_int32,idx));
-                    else
+                    else if (lt->isAggregateType())
                         strct = builder.CreateInsertValue(strct, fval, ArrayRef<unsigned>(&idx,1));
+                    else {
+                        // Must be a VecElement type, which comes unwrapped in LLVM.
+                        assert(is_vecelement_type(ty));
+                        strct = fval;
+                    }
                 }
                 idx++;
             }

diff --git a/src/codegen.cpp b/src/codegen.cpp
@@ -3936,7 +3936,7 @@ static Function *gen_jlcall_wrapper(jl_lambda_info_t *lam, Function *f, bool sre
         if (lty != NULL && !isboxed) {
             theArg = builder.CreatePointerCast(theArg, PointerType::get(lty,0));
             if (!lty->isAggregateType()) // keep "aggregate" type values in place as pointers
-                theArg = builder.CreateLoad(theArg);
+                theArg = build_load(theArg, ty);
         }
         assert(dyn_cast<UndefValue>(theArg) == NULL);
         args[idx] = theArg;

diff --git a/src/init.c b/src/init.c
@@ -833,6 +833,7 @@ void jl_get_builtin_hooks(void)
     jl_ascii_string_type = (jl_datatype_t*)core("ASCIIString");
     jl_utf8_string_type = (jl_datatype_t*)core("UTF8String");
     jl_weakref_type = (jl_datatype_t*)core("WeakRef");
+    jl_vecelement_typename = ((jl_datatype_t*)core("VecElement"))->name;
 }
 
 JL_DLLEXPORT void jl_get_system_hooks(void)

diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
@@ -305,9 +305,13 @@ static Value *emit_unbox(Type *to, const jl_cgval_t &x, jl_value_t *jt)
     if (jt == (jl_value_t*)jl_bool_type)
         return builder.CreateZExt(builder.CreateTrunc(builder.CreateLoad(p), T_int1), to);
 
-    if (!x.isboxed) // stack has default alignment
+    if (x.isboxed)
+        return builder.CreateAlignedLoad(p, 16); // julia's gc gives 16-byte aligned addresses
+    else if (jt)
+        return build_load(p, jt);
+    else
+        // stack has default alignment
         return builder.CreateLoad(p);
-    return builder.CreateAlignedLoad(p, 16); // julia's gc gives 16-byte aligned addresses
 }
 
 // unbox, trying to determine correct bitstype automatically

diff --git a/src/jltypes.c b/src/jltypes.c
@@ -35,6 +35,7 @@ jl_tupletype_t *jl_anytuple_type;
 jl_datatype_t *jl_anytuple_type_type;
 jl_datatype_t *jl_ntuple_type;
 jl_typename_t *jl_ntuple_typename;
+jl_typename_t *jl_vecelement_typename;
 jl_datatype_t *jl_vararg_type;
 jl_datatype_t *jl_tvar_type;
 jl_datatype_t *jl_uniontype_type;

diff --git a/src/julia.h b/src/julia.h
@@ -441,6 +441,7 @@ extern JL_DLLEXPORT jl_datatype_t *jl_slotnumber_type;
 extern JL_DLLEXPORT jl_datatype_t *jl_typedslot_type;
 extern JL_DLLEXPORT jl_datatype_t *jl_simplevector_type;
 extern JL_DLLEXPORT jl_typename_t *jl_tuple_typename;
+extern JL_DLLEXPORT jl_typename_t *jl_vecelement_typename;
 extern JL_DLLEXPORT jl_datatype_t *jl_anytuple_type;
 #define jl_tuple_type jl_anytuple_type
 extern JL_DLLEXPORT jl_datatype_t *jl_anytuple_type_type;
@@ -940,6 +941,12 @@ STATIC_INLINE int jl_is_tuple_type(void *t)
             ((jl_datatype_t*)(t))->name == jl_tuple_typename);
 }
 
+STATIC_INLINE int is_vecelement_type(jl_value_t* t)
+{
+    return (jl_is_datatype(t) &&
+            ((jl_datatype_t*)(t))->name == jl_vecelement_typename);
+}
+
 STATIC_INLINE int jl_is_vararg_type(jl_value_t *v)
 {
     return (jl_is_datatype(v) &&

diff --git a/src/julia_internal.h b/src/julia_internal.h
@@ -585,6 +585,7 @@ int sigs_eq(jl_value_t *a, jl_value_t *b, int useenv);
 
 jl_value_t *jl_lookup_match(jl_value_t *a, jl_value_t *b, jl_svec_t **penv, jl_svec_t *tvars);
 
+unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type);
 
 #ifdef __cplusplus
 }

diff --git a/test/choosetests.jl b/test/choosetests.jl
@@ -19,7 +19,7 @@ function choosetests(choices = [])
         "char", "string", "triplequote", "unicode",
         "dates", "dict", "hashing", "remote", "iobuffer", "staged",
         "arrayops", "tuple", "subarray", "reduce", "reducedim", "random",
-        "abstractarray", "intfuncs", "simdloop", "blas", "sparse",
+        "abstractarray", "intfuncs", "simdloop", "vecelement", "blas", "sparse",
         "bitarray", "copy", "math", "fastmath", "functional",
         "operators", "path", "ccall", "parse", "loading", "bigint",
         "bigfloat", "sorting", "statistics", "spawn", "backtrace",
-Original file line number
+Diff line change
@@ Expand Up / @@ -585,6 +585,7 @@ int sigs_eq(jl_value_t *a, jl_value_t *b, int useenv); @@
     jl_value_t *jl_lookup_match(jl_value_t *a, jl_value_t *b, jl_svec_t **penv, jl_svec_t *tvars);
+    unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type);
     #ifdef __cplusplus
     }
@@ Expand Down @@