Skip to content

Commit

Permalink
Translate SIMD construction as insertelements and a single store.
Browse files Browse the repository at this point in the history
This almost completely avoids GEPi's and pointer manipulation,
postponing it until the end with one big write of the whole vector. This
leads to a small speed-up in compilation, and makes it easier for LLVM
to work with the values, e.g. with `--opt-level=0`,

    pub fn foo() -> f32x4 {
        f32x4(0.,0.,0.,0.)
    }

was previously compiled to

    define <4 x float> @_ZN3foo20h74913e8b13d89666eaaE() unnamed_addr #0 {
    entry-block:
      %sret_slot = alloca <4 x float>
      %0 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 0
      store float 0.000000e+00, float* %0
      %1 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 1
      store float 0.000000e+00, float* %1
      %2 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 2
      store float 0.000000e+00, float* %2
      %3 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 3
      store float 0.000000e+00, float* %3
      %4 = load <4 x float>* %sret_slot
      ret <4 x float> %4
    }

but now becomes

    define <4 x float> @_ZN3foo20h74913e8b13d89666eaaE() unnamed_addr #0 {
    entry-block:
      ret <4 x float> zeroinitializer
    }
  • Loading branch information
huonw committed Nov 4, 2014
1 parent ff50f24 commit 071c411
Showing 1 changed file with 29 additions and 8 deletions.
37 changes: 29 additions & 8 deletions src/librustc/middle/trans/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1455,14 +1455,35 @@ pub fn trans_adt<'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
None => {}
};

// Now, we just overwrite the fields we've explicitly specified
for &(i, ref e) in fields.iter() {
let dest = adt::trans_field_ptr(bcx, &*repr, addr, discr, i);
let e_ty = expr_ty_adjusted(bcx, &**e);
bcx = trans_into(bcx, &**e, SaveIn(dest));
let scope = cleanup::CustomScope(custom_cleanup_scope);
fcx.schedule_lifetime_end(scope, dest);
fcx.schedule_drop_mem(scope, dest, e_ty);
if ty::type_is_simd(bcx.tcx(), ty) {
// This is the constructor of a SIMD type, such types are
// always primitive machine types and so do not have a
// destructor or require any clean-up.
let llty = type_of::type_of(bcx.ccx(), ty);

// keep a vector as a register, and running through the field
// `insertelement`ing them directly into that register
// (i.e. avoid GEPi and `store`s to an alloca) .
let mut vec_val = C_undef(llty);

for &(i, ref e) in fields.iter() {
let block_datum = trans(bcx, &**e);
bcx = block_datum.bcx;
let position = C_uint(bcx.ccx(), i);
let value = block_datum.datum.to_llscalarish(bcx);
vec_val = InsertElement(bcx, vec_val, value, position);
}
Store(bcx, vec_val, addr);
} else {
// Now, we just overwrite the fields we've explicitly specified
for &(i, ref e) in fields.iter() {
let dest = adt::trans_field_ptr(bcx, &*repr, addr, discr, i);
let e_ty = expr_ty_adjusted(bcx, &**e);
bcx = trans_into(bcx, &**e, SaveIn(dest));
let scope = cleanup::CustomScope(custom_cleanup_scope);
fcx.schedule_lifetime_end(scope, dest);
fcx.schedule_drop_mem(scope, dest, e_ty);
}
}

adt::trans_set_discr(bcx, &*repr, addr, discr);
Expand Down

0 comments on commit 071c411

Please sign in to comment.