Translate SIMD construction as insertelements and a single store.

This almost completely avoids GEPi's and pointer manipulation, postponing it until the end with one big write of the whole vector. This leads to a small speed-up in compilation, and makes it easier for LLVM to work with the values, e.g. with `--opt-level=0`, pub fn foo() -> f32x4 { f32x4(0.,0.,0.,0.) } was previously compiled to define <4 x float> @_ZN3foo20h74913e8b13d89666eaaE() unnamed_addr #0 { entry-block: %sret_slot = alloca <4 x float> %0 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 0 store float 0.000000e+00, float* %0 %1 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 1 store float 0.000000e+00, float* %1 %2 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 2 store float 0.000000e+00, float* %2 %3 = getelementptr inbounds <4 x float>* %sret_slot, i32 0, i32 3 store float 0.000000e+00, float* %3 %4 = load <4 x float>* %sret_slot ret <4 x float> %4 } but now becomes define <4 x float> @_ZN3foo20h74913e8b13d89666eaaE() unnamed_addr #0 { entry-block: ret <4 x float> zeroinitializer }
rust-lang · Nov 4, 2014 · 071c411 · 071c411
1 parent ff50f24
commit 071c411
Showing 1 changed file with 29 additions and 8 deletions.
diff --git a/src/librustc/middle/trans/expr.rs b/src/librustc/middle/trans/expr.rs
@@ -1455,14 +1455,35 @@ pub fn trans_adt<'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
         None => {}
     };
 
-    // Now, we just overwrite the fields we've explicitly specified
-    for &(i, ref e) in fields.iter() {
-        let dest = adt::trans_field_ptr(bcx, &*repr, addr, discr, i);
-        let e_ty = expr_ty_adjusted(bcx, &**e);
-        bcx = trans_into(bcx, &**e, SaveIn(dest));
-        let scope = cleanup::CustomScope(custom_cleanup_scope);
-        fcx.schedule_lifetime_end(scope, dest);
-        fcx.schedule_drop_mem(scope, dest, e_ty);
+    if ty::type_is_simd(bcx.tcx(), ty) {
+        // This is the constructor of a SIMD type, such types are
+        // always primitive machine types and so do not have a
+        // destructor or require any clean-up.
+        let llty = type_of::type_of(bcx.ccx(), ty);
+
+        // keep a vector as a register, and running through the field
+        // `insertelement`ing them directly into that register
+        // (i.e. avoid GEPi and `store`s to an alloca) .
+        let mut vec_val = C_undef(llty);
+
+        for &(i, ref e) in fields.iter() {
+            let block_datum = trans(bcx, &**e);
+            bcx = block_datum.bcx;
+            let position = C_uint(bcx.ccx(), i);
+            let value = block_datum.datum.to_llscalarish(bcx);
+            vec_val = InsertElement(bcx, vec_val, value, position);
+        }
+        Store(bcx, vec_val, addr);
+    } else {
+        // Now, we just overwrite the fields we've explicitly specified
+        for &(i, ref e) in fields.iter() {
+            let dest = adt::trans_field_ptr(bcx, &*repr, addr, discr, i);
+            let e_ty = expr_ty_adjusted(bcx, &**e);
+            bcx = trans_into(bcx, &**e, SaveIn(dest));
+            let scope = cleanup::CustomScope(custom_cleanup_scope);
+            fcx.schedule_lifetime_end(scope, dest);
+            fcx.schedule_drop_mem(scope, dest, e_ty);
+        }
     }
 
     adt::trans_set_discr(bcx, &*repr, addr, discr);