From 071c4110457358f5b91287a63fb782201f5eb7ad Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Tue, 4 Nov 2014 20:59:00 +1100 Subject: [PATCH] Translate SIMD construction as `insertelement`s and a single store. This almost completely avoids GEPi's and pointer manipulation, postponing it until the end with one big write of the whole vector. This leads to a small speed-up in compilation, and makes it easier for LLVM to work with the values, e.g. with `--opt-level=0`, pub fn foo() -> f32x4 { f32x4(0.,0.,0.,0.) } was previously compiled to define <4 x float> @_ZN3foo20h74913e8b13d89666eaaE() unnamed_addr #0 { entry-block: %sret_slot = alloca <4 x float> %0 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 0 store float 0.000000e+00, float* %0 %1 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 1 store float 0.000000e+00, float* %1 %2 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 2 store float 0.000000e+00, float* %2 %3 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 3 store float 0.000000e+00, float* %3 %4 = load <4 x float>* %sret_slot ret <4 x float> %4 } but now becomes define <4 x float> @_ZN3foo20h74913e8b13d89666eaaE() unnamed_addr #0 { entry-block: ret <4 x float> zeroinitializer } --- src/librustc/middle/trans/expr.rs | 37 ++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/src/librustc/middle/trans/expr.rs b/src/librustc/middle/trans/expr.rs index f516c6106a991..dca6e10f04de3 100644 --- a/src/librustc/middle/trans/expr.rs +++ b/src/librustc/middle/trans/expr.rs @@ -1455,14 +1455,35 @@ pub fn trans_adt<'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>, None => {} }; - // Now, we just overwrite the fields we've explicitly specified - for &(i, ref e) in fields.iter() { - let dest = adt::trans_field_ptr(bcx, &*repr, addr, discr, i); - let e_ty = expr_ty_adjusted(bcx, &**e); - bcx = trans_into(bcx, &**e, SaveIn(dest)); - let scope = cleanup::CustomScope(custom_cleanup_scope); - fcx.schedule_lifetime_end(scope, dest); - fcx.schedule_drop_mem(scope, dest, e_ty); + if ty::type_is_simd(bcx.tcx(), ty) { + // This is the constructor of a SIMD type, such types are + // always primitive machine types and so do not have a + // destructor or require any clean-up. + let llty = type_of::type_of(bcx.ccx(), ty); + + // keep a vector as a register, and running through the field + // `insertelement`ing them directly into that register + // (i.e. avoid GEPi and `store`s to an alloca) . + let mut vec_val = C_undef(llty); + + for &(i, ref e) in fields.iter() { + let block_datum = trans(bcx, &**e); + bcx = block_datum.bcx; + let position = C_uint(bcx.ccx(), i); + let value = block_datum.datum.to_llscalarish(bcx); + vec_val = InsertElement(bcx, vec_val, value, position); + } + Store(bcx, vec_val, addr); + } else { + // Now, we just overwrite the fields we've explicitly specified + for &(i, ref e) in fields.iter() { + let dest = adt::trans_field_ptr(bcx, &*repr, addr, discr, i); + let e_ty = expr_ty_adjusted(bcx, &**e); + bcx = trans_into(bcx, &**e, SaveIn(dest)); + let scope = cleanup::CustomScope(custom_cleanup_scope); + fcx.schedule_lifetime_end(scope, dest); + fcx.schedule_drop_mem(scope, dest, e_ty); + } } adt::trans_set_discr(bcx, &*repr, addr, discr);