Skip to content

Commit 306a99e

Browse files
committed
Auto merge of #26411 - dotdash:fat_in_registers, r=aatch
This has a number of advantages compared to creating a copy in memory and passing a pointer. The obvious one is that we don't have to put the data into memory but can keep it in registers. Since we're currently passing a pointer anyway (instead of using e.g. a known offset on the stack, which is what the `byval` attribute would achieve), we only use a single additional register for each fat pointer, but save at least two pointers worth of stack in exchange (sometimes more because more than one copy gets eliminated). On archs that pass arguments on the stack, we save a pointer worth of stack even without considering the omitted copies. Additionally, LLVM can optimize the code a lot better, to a large degree due to the fact that lots of copies are gone or can be optimized away. Additionally, we can now emit attributes like nonnull on the data and/or vtable pointers contained in the fat pointer, potentially allowing for even more optimizations. This results in LLVM passes being about 3-7% faster (depending on the crate), and the resulting code is also a few percent smaller, for example: |text|data|filename| |----|----|--------| |5671479|3941461|before/librustc-d8ace771.so| |5447663|3905745|after/librustc-d8ace771.so| | | | | |1944425|2394024|before/libstd-d8ace771.so| |1896769|2387610|after/libstd-d8ace771.so| I had to remove a call in the backtrace-debuginfo test, because LLVM can now merge the tails of some blocks when optimizations are turned on, which can't correctly preserve line info. Fixes #22924 Cc #22891 (at least for fat pointers the code is good now)
2 parents 40d19bf + f777562 commit 306a99e

17 files changed

+322
-256
lines changed

src/librustc_llvm/lib.rs

+12
Original file line numberDiff line numberDiff line change
@@ -2252,6 +2252,18 @@ pub fn get_param(llfn: ValueRef, index: c_uint) -> ValueRef {
22522252
}
22532253
}
22542254

2255+
pub fn get_params(llfn: ValueRef) -> Vec<ValueRef> {
2256+
unsafe {
2257+
let num_params = LLVMCountParams(llfn);
2258+
let mut params = Vec::with_capacity(num_params as usize);
2259+
for idx in 0..num_params {
2260+
params.push(LLVMGetParam(llfn, idx));
2261+
}
2262+
2263+
params
2264+
}
2265+
}
2266+
22552267
#[allow(missing_copy_implementations)]
22562268
pub enum RustString_opaque {}
22572269
pub type RustStringRef = *mut RustString_opaque;

src/librustc_trans/trans/_match.rs

+5-13
Original file line numberDiff line numberDiff line change
@@ -828,19 +828,11 @@ fn compare_values<'blk, 'tcx>(cx: Block<'blk, 'tcx>,
828828
None,
829829
&format!("comparison of `{}`", rhs_t),
830830
StrEqFnLangItem);
831-
let t = ty::mk_str_slice(cx.tcx(), cx.tcx().mk_region(ty::ReStatic), ast::MutImmutable);
832-
// The comparison function gets the slices by value, so we have to make copies here. Even
833-
// if the function doesn't write through the pointer, things like lifetime intrinsics
834-
// require that we do this properly
835-
let lhs_arg = alloc_ty(cx, t, "lhs");
836-
let rhs_arg = alloc_ty(cx, t, "rhs");
837-
memcpy_ty(cx, lhs_arg, lhs, t);
838-
memcpy_ty(cx, rhs_arg, rhs, t);
839-
let res = callee::trans_lang_call(cx, did, &[lhs_arg, rhs_arg], None, debug_loc);
840-
call_lifetime_end(res.bcx, lhs_arg);
841-
call_lifetime_end(res.bcx, rhs_arg);
842-
843-
res
831+
let lhs_data = Load(cx, expr::get_dataptr(cx, lhs));
832+
let lhs_len = Load(cx, expr::get_len(cx, lhs));
833+
let rhs_data = Load(cx, expr::get_dataptr(cx, rhs));
834+
let rhs_len = Load(cx, expr::get_len(cx, rhs));
835+
callee::trans_lang_call(cx, did, &[lhs_data, lhs_len, rhs_data, rhs_len], None, debug_loc)
844836
}
845837

846838
let _icx = push_ctxt("compare_values");

src/librustc_trans/trans/asm.rs

+12-13
Original file line numberDiff line numberDiff line change
@@ -45,32 +45,31 @@ pub fn trans_inline_asm<'blk, 'tcx>(bcx: Block<'blk, 'tcx>, ia: &ast::InlineAsm)
4545
output_types.push(type_of::type_of(bcx.ccx(), out_datum.ty));
4646
let val = out_datum.val;
4747
if is_rw {
48-
ext_inputs.push(unpack_result!(bcx, {
49-
callee::trans_arg_datum(bcx,
50-
expr_ty(bcx, &**out),
51-
out_datum,
52-
cleanup::CustomScope(temp_scope),
53-
callee::DontAutorefArg)
54-
}));
48+
bcx = callee::trans_arg_datum(bcx,
49+
expr_ty(bcx, &**out),
50+
out_datum,
51+
cleanup::CustomScope(temp_scope),
52+
callee::DontAutorefArg,
53+
&mut ext_inputs);
5554
ext_constraints.push(i.to_string());
5655
}
5756
val
5857

5958
}).collect::<Vec<_>>();
6059

6160
// Now the input operands
62-
let mut inputs = ia.inputs.iter().map(|&(ref c, ref input)| {
61+
let mut inputs = Vec::new();
62+
for &(ref c, ref input) in &ia.inputs {
6363
constraints.push((*c).clone());
6464

6565
let in_datum = unpack_datum!(bcx, expr::trans(bcx, &**input));
66-
unpack_result!(bcx, {
67-
callee::trans_arg_datum(bcx,
66+
bcx = callee::trans_arg_datum(bcx,
6867
expr_ty(bcx, &**input),
6968
in_datum,
7069
cleanup::CustomScope(temp_scope),
71-
callee::DontAutorefArg)
72-
})
73-
}).collect::<Vec<_>>();
70+
callee::DontAutorefArg,
71+
&mut inputs);
72+
}
7473
inputs.push_all(&ext_inputs[..]);
7574

7675
// no failure occurred preparing operands, no need to cleanup

src/librustc_trans/trans/attributes.rs

+47-34
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ pub fn from_fn_type<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, fn_type: ty::Ty<'tcx
188188
};
189189

190190
// Index 0 is the return value of the llvm func, so we start at 1
191-
let mut first_arg_offset = 1;
191+
let mut idx = 1;
192192
if let ty::FnConverging(ret_ty) = ret_ty {
193193
// A function pointer is called without the declaration
194194
// available, so we have to apply any attributes with ABI
@@ -206,7 +206,7 @@ pub fn from_fn_type<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, fn_type: ty::Ty<'tcx
206206
.arg(1, llvm::DereferenceableAttribute(llret_sz));
207207

208208
// Add one more since there's an outptr
209-
first_arg_offset += 1;
209+
idx += 1;
210210
} else {
211211
// The `noalias` attribute on the return value is useful to a
212212
// function ptr caller.
@@ -236,10 +236,9 @@ pub fn from_fn_type<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, fn_type: ty::Ty<'tcx
236236
}
237237
}
238238

239-
for (idx, &t) in input_tys.iter().enumerate().map(|(i, v)| (i + first_arg_offset, v)) {
239+
for &t in input_tys.iter() {
240240
match t.sty {
241-
// this needs to be first to prevent fat pointers from falling through
242-
_ if !common::type_is_immediate(ccx, t) => {
241+
_ if type_of::arg_is_indirect(ccx, t) => {
243242
let llarg_sz = machine::llsize_of_real(ccx, type_of::type_of(ccx, t));
244243

245244
// For non-immediate arguments the callee gets its own copy of
@@ -256,49 +255,63 @@ pub fn from_fn_type<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx>, fn_type: ty::Ty<'tcx
256255

257256
// `Box` pointer parameters never alias because ownership is transferred
258257
ty::TyBox(inner) => {
259-
let llsz = machine::llsize_of_real(ccx, type_of::type_of(ccx, inner));
260-
261-
attrs.arg(idx, llvm::Attribute::NoAlias)
262-
.arg(idx, llvm::DereferenceableAttribute(llsz));
258+
attrs.arg(idx, llvm::Attribute::NoAlias);
259+
260+
if common::type_is_sized(ccx.tcx(), inner) {
261+
let llsz = machine::llsize_of_real(ccx, type_of::type_of(ccx, inner));
262+
attrs.arg(idx, llvm::DereferenceableAttribute(llsz));
263+
} else {
264+
attrs.arg(idx, llvm::NonNullAttribute);
265+
if ty::type_is_trait(inner) {
266+
attrs.arg(idx + 1, llvm::NonNullAttribute);
267+
}
268+
}
263269
}
264270

265-
// `&mut` pointer parameters never alias other parameters, or mutable global data
266-
//
267-
// `&T` where `T` contains no `UnsafeCell<U>` is immutable, and can be marked as both
268-
// `readonly` and `noalias`, as LLVM's definition of `noalias` is based solely on
269-
// memory dependencies rather than pointer equality
270-
ty::TyRef(b, mt) if mt.mutbl == ast::MutMutable ||
271-
!ty::type_contents(ccx.tcx(), mt.ty).interior_unsafe() => {
271+
ty::TyRef(b, mt) => {
272+
// `&mut` pointer parameters never alias other parameters, or mutable global data
273+
//
274+
// `&T` where `T` contains no `UnsafeCell<U>` is immutable, and can be marked as
275+
// both `readonly` and `noalias`, as LLVM's definition of `noalias` is based solely
276+
// on memory dependencies rather than pointer equality
277+
let interior_unsafe = ty::type_contents(ccx.tcx(), mt.ty).interior_unsafe();
272278

273-
let llsz = machine::llsize_of_real(ccx, type_of::type_of(ccx, mt.ty));
274-
attrs.arg(idx, llvm::Attribute::NoAlias)
275-
.arg(idx, llvm::DereferenceableAttribute(llsz));
279+
if mt.mutbl == ast::MutMutable || !interior_unsafe {
280+
attrs.arg(idx, llvm::Attribute::NoAlias);
281+
}
276282

277-
if mt.mutbl == ast::MutImmutable {
283+
if mt.mutbl == ast::MutImmutable && !interior_unsafe {
278284
attrs.arg(idx, llvm::Attribute::ReadOnly);
279285
}
280286

287+
// & pointer parameters are also never null and for sized types we also know
288+
// exactly how many bytes we can dereference
289+
if common::type_is_sized(ccx.tcx(), mt.ty) {
290+
let llsz = machine::llsize_of_real(ccx, type_of::type_of(ccx, mt.ty));
291+
attrs.arg(idx, llvm::DereferenceableAttribute(llsz));
292+
} else {
293+
attrs.arg(idx, llvm::NonNullAttribute);
294+
if ty::type_is_trait(mt.ty) {
295+
attrs.arg(idx + 1, llvm::NonNullAttribute);
296+
}
297+
}
298+
299+
// When a reference in an argument has no named lifetime, it's
300+
// impossible for that reference to escape this function
301+
// (returned or stored beyond the call by a closure).
281302
if let ReLateBound(_, BrAnon(_)) = *b {
282303
attrs.arg(idx, llvm::Attribute::NoCapture);
283304
}
284305
}
285306

286-
// When a reference in an argument has no named lifetime, it's impossible for that
287-
// reference to escape this function (returned or stored beyond the call by a closure).
288-
ty::TyRef(&ReLateBound(_, BrAnon(_)), mt) => {
289-
let llsz = machine::llsize_of_real(ccx, type_of::type_of(ccx, mt.ty));
290-
attrs.arg(idx, llvm::Attribute::NoCapture)
291-
.arg(idx, llvm::DereferenceableAttribute(llsz));
292-
}
293-
294-
// & pointer parameters are also never null and we know exactly how
295-
// many bytes we can dereference
296-
ty::TyRef(_, mt) => {
297-
let llsz = machine::llsize_of_real(ccx, type_of::type_of(ccx, mt.ty));
298-
attrs.arg(idx, llvm::DereferenceableAttribute(llsz));
299-
}
300307
_ => ()
301308
}
309+
310+
if common::type_is_fat_ptr(ccx.tcx(), t) {
311+
idx += 2;
312+
} else {
313+
idx += 1;
314+
}
302315
}
303316

304317
attrs

src/librustc_trans/trans/base.rs

+46-17
Original file line numberDiff line numberDiff line change
@@ -1297,16 +1297,28 @@ pub type RvalueDatum<'tcx> = datum::Datum<'tcx, datum::Rvalue>;
12971297
// create_datums_for_fn_args: creates rvalue datums for each of the
12981298
// incoming function arguments. These will later be stored into
12991299
// appropriate lvalue datums.
1300-
pub fn create_datums_for_fn_args<'a, 'tcx>(fcx: &FunctionContext<'a, 'tcx>,
1300+
pub fn create_datums_for_fn_args<'a, 'tcx>(bcx: Block<'a, 'tcx>,
13011301
arg_tys: &[Ty<'tcx>])
13021302
-> Vec<RvalueDatum<'tcx>> {
13031303
let _icx = push_ctxt("create_datums_for_fn_args");
1304+
let fcx = bcx.fcx;
13041305

13051306
// Return an array wrapping the ValueRefs that we get from `get_param` for
13061307
// each argument into datums.
1307-
arg_tys.iter().enumerate().map(|(i, &arg_ty)| {
1308-
let llarg = get_param(fcx.llfn, fcx.arg_pos(i) as c_uint);
1309-
datum::Datum::new(llarg, arg_ty, arg_kind(fcx, arg_ty))
1308+
let mut i = fcx.arg_offset() as c_uint;
1309+
arg_tys.iter().map(|&arg_ty| {
1310+
if common::type_is_fat_ptr(bcx.tcx(), arg_ty) {
1311+
let llty = type_of::type_of(bcx.ccx(), arg_ty);
1312+
let data = get_param(fcx.llfn, i);
1313+
let extra = get_param(fcx.llfn, i + 1);
1314+
let fat_ptr = expr::make_fat_ptr(bcx, llty, data, extra);
1315+
i += 2;
1316+
datum::Datum::new(fat_ptr, arg_ty, datum::Rvalue { mode: datum::ByValue })
1317+
} else {
1318+
let llarg = get_param(fcx.llfn, i);
1319+
i += 1;
1320+
datum::Datum::new(llarg, arg_ty, arg_kind(fcx, arg_ty))
1321+
}
13101322
}).collect()
13111323
}
13121324

@@ -1321,12 +1333,23 @@ fn create_datums_for_fn_args_under_call_abi<'blk, 'tcx>(
13211333
arg_tys: &[Ty<'tcx>])
13221334
-> Vec<RvalueDatum<'tcx>> {
13231335
let mut result = Vec::new();
1336+
let mut idx = bcx.fcx.arg_offset() as c_uint;
13241337
for (i, &arg_ty) in arg_tys.iter().enumerate() {
13251338
if i < arg_tys.len() - 1 {
13261339
// Regular argument.
1327-
let llarg = get_param(bcx.fcx.llfn, bcx.fcx.arg_pos(i) as c_uint);
1328-
result.push(datum::Datum::new(llarg, arg_ty, arg_kind(bcx.fcx,
1329-
arg_ty)));
1340+
result.push(if common::type_is_fat_ptr(bcx.tcx(), arg_ty) {
1341+
let llty = type_of::type_of(bcx.ccx(), arg_ty);
1342+
let data = get_param(bcx.fcx.llfn, idx);
1343+
let extra = get_param(bcx.fcx.llfn, idx + 1);
1344+
idx += 2;
1345+
let fat_ptr = expr::make_fat_ptr(bcx, llty, data, extra);
1346+
datum::Datum::new(fat_ptr, arg_ty, datum::Rvalue { mode: datum::ByValue })
1347+
} else {
1348+
let val = get_param(bcx.fcx.llfn, idx);
1349+
idx += 1;
1350+
datum::Datum::new(val, arg_ty, arg_kind(bcx.fcx, arg_ty))
1351+
});
1352+
13301353
continue
13311354
}
13321355

@@ -1346,15 +1369,21 @@ fn create_datums_for_fn_args_under_call_abi<'blk, 'tcx>(
13461369
llval| {
13471370
for (j, &tupled_arg_ty) in
13481371
tupled_arg_tys.iter().enumerate() {
1349-
let llarg =
1350-
get_param(bcx.fcx.llfn,
1351-
bcx.fcx.arg_pos(i + j) as c_uint);
13521372
let lldest = GEPi(bcx, llval, &[0, j]);
1353-
let datum = datum::Datum::new(
1354-
llarg,
1355-
tupled_arg_ty,
1356-
arg_kind(bcx.fcx, tupled_arg_ty));
1357-
bcx = datum.store_to(bcx, lldest);
1373+
if common::type_is_fat_ptr(bcx.tcx(), tupled_arg_ty) {
1374+
let data = get_param(bcx.fcx.llfn, idx);
1375+
let extra = get_param(bcx.fcx.llfn, idx + 1);
1376+
Store(bcx, data, expr::get_dataptr(bcx, lldest));
1377+
Store(bcx, extra, expr::get_len(bcx, lldest));
1378+
idx += 2;
1379+
} else {
1380+
let datum = datum::Datum::new(
1381+
get_param(bcx.fcx.llfn, idx),
1382+
tupled_arg_ty,
1383+
arg_kind(bcx.fcx, tupled_arg_ty));
1384+
idx += 1;
1385+
bcx = datum.store_to(bcx, lldest);
1386+
};
13581387
}
13591388
bcx
13601389
}));
@@ -1566,7 +1595,7 @@ pub fn trans_closure<'a, 'b, 'tcx>(ccx: &CrateContext<'a, 'tcx>,
15661595
}
15671596
_ => {
15681597
let arg_tys = untuple_arguments_if_necessary(ccx, &monomorphized_arg_types, abi);
1569-
create_datums_for_fn_args(&fcx, &arg_tys)
1598+
create_datums_for_fn_args(bcx, &arg_tys)
15701599
}
15711600
};
15721601

@@ -1773,7 +1802,7 @@ fn trans_enum_variant_or_tuple_like_struct<'a, 'tcx>(ccx: &CrateContext<'a, 'tcx
17731802
ty::erase_late_bound_regions(
17741803
ccx.tcx(), &ty::ty_fn_args(ctor_ty));
17751804

1776-
let arg_datums = create_datums_for_fn_args(&fcx, &arg_tys[..]);
1805+
let arg_datums = create_datums_for_fn_args(bcx, &arg_tys[..]);
17771806

17781807
if !type_is_zero_size(fcx.ccx, result_ty.unwrap()) {
17791808
let dest = fcx.get_ret_slot(bcx, result_ty, "eret_slot");

0 commit comments

Comments
 (0)