Skip to content

Commit 15ea4a8

Browse files
committedJul 17, 2013
Generate better code for intrinsics
Currently, our intrinsics are generated as functions that have the usual setup, which means an alloca, and therefore also a jump, for those intrinsics that return an immediate value. This is especially bad for unoptimized builds because it means that an intrinsic like "contains_managed" that should be just "ret 0" or "ret 1" actually ends up allocating stack space, doing a jump and a store/load sequence before it finally returns the value. To fix that, we need a way to stop the generic function declaration mechanism from allocating stack space for the return value. This implicitly also kills the jump, because the block for static allocas isn't required anymore. Additionally, trans_intrinsic needs to build the return itself instead of calling finish_fn, because the latter relies on the availability of the return value pointer. With these changes, we get the bare minimum code required for our intrinsics, which makes them small enough that inlining them makes the resulting code smaller, so we can mark them as "always inline" to get better performing unoptimized builds. Optimized builds also benefit slightly from this change as there's less code for LLVM to translate and the smaller intrinsics help it to make better inlining decisions for a few code paths. Building stage2 librustc gets ~1% faster for the optimized version and 5% for the unoptimized version.
1 parent 3e57251 commit 15ea4a8

File tree

2 files changed

+61
-32
lines changed

2 files changed

+61
-32
lines changed
 

‎src/librustc/middle/trans/base.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -1610,6 +1610,7 @@ pub fn new_fn_ctxt_w_id(ccx: @mut CrateContext,
16101610
llfndecl: ValueRef,
16111611
id: ast::node_id,
16121612
output_type: ty::t,
1613+
skip_retptr: bool,
16131614
param_substs: Option<@param_substs>,
16141615
sp: Option<span>)
16151616
-> fn_ctxt {
@@ -1653,7 +1654,7 @@ pub fn new_fn_ctxt_w_id(ccx: @mut CrateContext,
16531654
fcx.llenv = unsafe {
16541655
llvm::LLVMGetParam(llfndecl, fcx.env_arg_pos() as c_uint)
16551656
};
1656-
if !ty::type_is_nil(substd_output_type) {
1657+
if !ty::type_is_nil(substd_output_type) && !(is_immediate && skip_retptr) {
16571658
fcx.llretptr = Some(make_return_pointer(fcx, substd_output_type));
16581659
}
16591660
fcx
@@ -1665,7 +1666,7 @@ pub fn new_fn_ctxt(ccx: @mut CrateContext,
16651666
output_type: ty::t,
16661667
sp: Option<span>)
16671668
-> fn_ctxt {
1668-
new_fn_ctxt_w_id(ccx, path, llfndecl, -1, output_type, None, sp)
1669+
new_fn_ctxt_w_id(ccx, path, llfndecl, -1, output_type, false, None, sp)
16691670
}
16701671

16711672
// NB: must keep 4 fns in sync:
@@ -1859,6 +1860,7 @@ pub fn trans_closure(ccx: @mut CrateContext,
18591860
llfndecl,
18601861
id,
18611862
output_type,
1863+
false,
18621864
param_substs,
18631865
Some(body.span));
18641866
let raw_llargs = create_llargs_for_fn_args(fcx, self_arg, decl.inputs);
@@ -2068,6 +2070,7 @@ pub fn trans_enum_variant_or_tuple_like_struct<A:IdAndTy>(
20682070
llfndecl,
20692071
ctor_id,
20702072
result_ty,
2073+
false,
20712074
param_substs,
20722075
None);
20732076

‎src/librustc/middle/trans/foreign.rs

+56-30
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111

1212
use back::{link, abi};
13-
use lib::llvm::{ValueRef};
13+
use lib::llvm::{Pointer, ValueRef};
1414
use lib;
1515
use middle::trans::base::*;
1616
use middle::trans::cabi;
@@ -558,7 +558,7 @@ pub fn trans_intrinsic(ccx: @mut CrateContext,
558558
args[i] = get_param(bcx.fcx.llfn, first_real_arg + i);
559559
}
560560
let llfn = bcx.ccx().intrinsics.get_copy(&name);
561-
Store(bcx, Call(bcx, llfn, args.slice(0, num_args)), bcx.fcx.llretptr.get());
561+
Ret(bcx, Call(bcx, llfn, args.slice(0, num_args)));
562562
}
563563

564564
fn memcpy_intrinsic(bcx: block, name: &'static str, tp_ty: ty::t, sizebits: u8) {
@@ -579,6 +579,7 @@ pub fn trans_intrinsic(ccx: @mut CrateContext,
579579
let volatile = C_i1(false);
580580
let llfn = bcx.ccx().intrinsics.get_copy(&name);
581581
Call(bcx, llfn, [dst_ptr, src_ptr, Mul(bcx, size, count), align, volatile]);
582+
RetVoid(bcx);
582583
}
583584

584585
fn memset_intrinsic(bcx: block, name: &'static str, tp_ty: ty::t, sizebits: u8) {
@@ -599,13 +600,14 @@ pub fn trans_intrinsic(ccx: @mut CrateContext,
599600
let volatile = C_i1(false);
600601
let llfn = bcx.ccx().intrinsics.get_copy(&name);
601602
Call(bcx, llfn, [dst_ptr, val, Mul(bcx, size, count), align, volatile]);
603+
RetVoid(bcx);
602604
}
603605

604606
fn count_zeros_intrinsic(bcx: block, name: &'static str) {
605607
let x = get_param(bcx.fcx.llfn, bcx.fcx.arg_pos(0u));
606608
let y = C_i1(false);
607609
let llfn = bcx.ccx().intrinsics.get_copy(&name);
608-
Store(bcx, Call(bcx, llfn, [x, y]), bcx.fcx.llretptr.get())
610+
Ret(bcx, Call(bcx, llfn, [x, y]));
609611
}
610612

611613
let output_type = ty::ty_fn_ret(ty::node_id_to_type(ccx.tcx, item.id));
@@ -615,16 +617,18 @@ pub fn trans_intrinsic(ccx: @mut CrateContext,
615617
decl,
616618
item.id,
617619
output_type,
620+
true,
618621
Some(substs),
619622
Some(item.span));
620623

624+
set_always_inline(fcx.llfn);
625+
621626
// Set the fixed stack segment flag if necessary.
622627
if attr::attrs_contains_name(attributes, "fixed_stack_segment") {
623628
set_fixed_stack_segment(fcx.llfn);
624629
}
625630

626631
let mut bcx = top_scope_block(fcx, None);
627-
let lltop = bcx.llbb;
628632
let first_real_arg = fcx.arg_pos(0u);
629633

630634
let nm = ccx.sess.str_of(item.ident);
@@ -653,17 +657,18 @@ pub fn trans_intrinsic(ccx: @mut CrateContext,
653657
get_param(decl, first_real_arg + 1u),
654658
get_param(decl, first_real_arg + 2u),
655659
order);
656-
Store(bcx, old, fcx.llretptr.get());
660+
Ret(bcx, old);
657661
}
658662
"load" => {
659663
let old = AtomicLoad(bcx, get_param(decl, first_real_arg),
660664
order);
661-
Store(bcx, old, fcx.llretptr.get());
665+
Ret(bcx, old);
662666
}
663667
"store" => {
664668
AtomicStore(bcx, get_param(decl, first_real_arg + 1u),
665669
get_param(decl, first_real_arg),
666670
order);
671+
RetVoid(bcx);
667672
}
668673
op => {
669674
// These are all AtomicRMW ops
@@ -685,21 +690,18 @@ pub fn trans_intrinsic(ccx: @mut CrateContext,
685690
let old = AtomicRMW(bcx, atom_op, get_param(decl, first_real_arg),
686691
get_param(decl, first_real_arg + 1u),
687692
order);
688-
Store(bcx, old, fcx.llretptr.get());
693+
Ret(bcx, old);
689694
}
690695
}
691696

692-
finish_fn(fcx, lltop, bcx);
693-
694697
return;
695698
}
696699

697700
match name {
698701
"size_of" => {
699702
let tp_ty = substs.tys[0];
700703
let lltp_ty = type_of::type_of(ccx, tp_ty);
701-
Store(bcx, C_uint(ccx, machine::llsize_of_real(ccx, lltp_ty)),
702-
fcx.llretptr.get());
704+
Ret(bcx, C_uint(ccx, machine::llsize_of_real(ccx, lltp_ty)));
703705
}
704706
"move_val" => {
705707
// Create a datum reflecting the value being moved.
@@ -713,6 +715,7 @@ pub fn trans_intrinsic(ccx: @mut CrateContext,
713715
ty: tp_ty, mode: mode};
714716
bcx = src.move_to(bcx, DROP_EXISTING,
715717
get_param(decl, first_real_arg));
718+
RetVoid(bcx);
716719
}
717720
"move_val_init" => {
718721
// See comments for `"move_val"`.
@@ -721,18 +724,17 @@ pub fn trans_intrinsic(ccx: @mut CrateContext,
721724
let src = Datum {val: get_param(decl, first_real_arg + 1u),
722725
ty: tp_ty, mode: mode};
723726
bcx = src.move_to(bcx, INIT, get_param(decl, first_real_arg));
727+
RetVoid(bcx);
724728
}
725729
"min_align_of" => {
726730
let tp_ty = substs.tys[0];
727731
let lltp_ty = type_of::type_of(ccx, tp_ty);
728-
Store(bcx, C_uint(ccx, machine::llalign_of_min(ccx, lltp_ty)),
729-
fcx.llretptr.get());
732+
Ret(bcx, C_uint(ccx, machine::llalign_of_min(ccx, lltp_ty)));
730733
}
731734
"pref_align_of"=> {
732735
let tp_ty = substs.tys[0];
733736
let lltp_ty = type_of::type_of(ccx, tp_ty);
734-
Store(bcx, C_uint(ccx, machine::llalign_of_pref(ccx, lltp_ty)),
735-
fcx.llretptr.get());
737+
Ret(bcx, C_uint(ccx, machine::llalign_of_pref(ccx, lltp_ty)));
736738
}
737739
"get_tydesc" => {
738740
let tp_ty = substs.tys[0];
@@ -745,19 +747,31 @@ pub fn trans_intrinsic(ccx: @mut CrateContext,
745747
// the llvm type of intrinsic::TyDesc struct.
746748
let userland_tydesc_ty = type_of::type_of(ccx, output_type);
747749
let td = PointerCast(bcx, static_ti.tydesc, userland_tydesc_ty);
748-
Store(bcx, td, fcx.llretptr.get());
750+
Ret(bcx, td);
749751
}
750752
"init" => {
751753
let tp_ty = substs.tys[0];
752754
let lltp_ty = type_of::type_of(ccx, tp_ty);
753-
if !ty::type_is_nil(tp_ty) {
754-
Store(bcx, C_null(lltp_ty), fcx.llretptr.get());
755+
match bcx.fcx.llretptr {
756+
Some(ptr) => { Store(bcx, C_null(lltp_ty), ptr); RetVoid(bcx); }
757+
None if ty::type_is_nil(tp_ty) => RetVoid(bcx),
758+
None => Ret(bcx, C_null(lltp_ty)),
755759
}
756760
}
757761
"uninit" => {
758762
// Do nothing, this is effectively a no-op
763+
let retty = substs.tys[0];
764+
if ty::type_is_immediate(ccx.tcx, retty) && !ty::type_is_nil(retty) {
765+
unsafe {
766+
Ret(bcx, lib::llvm::llvm::LLVMGetUndef(type_of(ccx, retty).to_ref()));
767+
}
768+
} else {
769+
RetVoid(bcx)
770+
}
771+
}
772+
"forget" => {
773+
RetVoid(bcx);
759774
}
760-
"forget" => {}
761775
"transmute" => {
762776
let (in_type, out_type) = (substs.tys[0], substs.tys[1]);
763777
let llintype = type_of::type_of(ccx, in_type);
@@ -784,34 +798,45 @@ pub fn trans_intrinsic(ccx: @mut CrateContext,
784798
}
785799

786800
if !ty::type_is_nil(out_type) {
787-
let lldestptr = fcx.llretptr.get();
788801
let llsrcval = get_param(decl, first_real_arg);
789802
if ty::type_is_immediate(ccx.tcx, in_type) {
790-
let lldestptr = PointerCast(bcx, lldestptr, llintype.ptr_to());
791-
Store(bcx, llsrcval, lldestptr);
803+
match fcx.llretptr {
804+
Some(llretptr) => {
805+
Store(bcx, llsrcval, PointerCast(bcx, llretptr, llintype.ptr_to()));
806+
RetVoid(bcx);
807+
}
808+
None => match (llintype.kind(), llouttype.kind()) {
809+
(Pointer, other) | (other, Pointer) if other != Pointer => {
810+
let tmp = Alloca(bcx, llouttype, "");
811+
Store(bcx, llsrcval, PointerCast(bcx, tmp, llintype.ptr_to()));
812+
Ret(bcx, Load(bcx, tmp));
813+
}
814+
_ => Ret(bcx, BitCast(bcx, llsrcval, llouttype))
815+
}
816+
}
792817
} else {
793818
// NB: Do not use a Load and Store here. This causes massive
794819
// code bloat when `transmute` is used on large structural
795820
// types.
821+
let lldestptr = fcx.llretptr.get();
796822
let lldestptr = PointerCast(bcx, lldestptr, Type::i8p());
797823
let llsrcptr = PointerCast(bcx, llsrcval, Type::i8p());
798824

799825
let llsize = llsize_of(ccx, llintype);
800826
call_memcpy(bcx, lldestptr, llsrcptr, llsize, 1);
827+
RetVoid(bcx);
801828
};
829+
} else {
830+
RetVoid(bcx);
802831
}
803832
}
804833
"needs_drop" => {
805834
let tp_ty = substs.tys[0];
806-
Store(bcx,
807-
C_bool(ty::type_needs_drop(ccx.tcx, tp_ty)),
808-
fcx.llretptr.get());
835+
Ret(bcx, C_bool(ty::type_needs_drop(ccx.tcx, tp_ty)));
809836
}
810837
"contains_managed" => {
811838
let tp_ty = substs.tys[0];
812-
Store(bcx,
813-
C_bool(ty::type_contents(ccx.tcx, tp_ty).contains_managed()),
814-
fcx.llretptr.get());
839+
Ret(bcx, C_bool(ty::type_contents(ccx.tcx, tp_ty).contains_managed()));
815840
}
816841
"visit_tydesc" => {
817842
let td = get_param(decl, first_real_arg);
@@ -821,6 +846,7 @@ pub fn trans_intrinsic(ccx: @mut CrateContext,
821846
let td = PointerCast(bcx, td, ccx.tydesc_type.ptr_to());
822847
glue::call_tydesc_glue_full(bcx, visitor, td,
823848
abi::tydesc_field_visit_glue, None);
849+
RetVoid(bcx);
824850
}
825851
"frame_address" => {
826852
let frameaddress = ccx.intrinsics.get_copy(& &"llvm.frameaddress");
@@ -847,6 +873,7 @@ pub fn trans_intrinsic(ccx: @mut CrateContext,
847873
bcx, None, fty, ty::mk_nil(),
848874
|bcx| Callee {bcx: bcx, data: Closure(datum)},
849875
ArgVals(arg_vals), Some(Ignore), DontAutorefArg).bcx;
876+
RetVoid(bcx);
850877
}
851878
"morestack_addr" => {
852879
// XXX This is a hack to grab the address of this particular
@@ -856,7 +883,7 @@ pub fn trans_intrinsic(ccx: @mut CrateContext,
856883
let morestack_addr = decl_cdecl_fn(
857884
bcx.ccx().llmod, "__morestack", llfty);
858885
let morestack_addr = PointerCast(bcx, morestack_addr, Type::nil().ptr_to());
859-
Store(bcx, morestack_addr, fcx.llretptr.get());
886+
Ret(bcx, morestack_addr);
860887
}
861888
"memcpy32" => memcpy_intrinsic(bcx, "llvm.memcpy.p0i8.p0i8.i32", substs.tys[0], 32),
862889
"memcpy64" => memcpy_intrinsic(bcx, "llvm.memcpy.p0i8.p0i8.i64", substs.tys[0], 64),
@@ -915,7 +942,6 @@ pub fn trans_intrinsic(ccx: @mut CrateContext,
915942
ccx.sess.span_bug(item.span, "unknown intrinsic");
916943
}
917944
}
918-
finish_fn(fcx, lltop, bcx);
919945
}
920946

921947
/**

0 commit comments

Comments
 (0)