Skip to content

Commit eecde58

Browse files
committed
Auto merge of rust-lang#103172 - pcwalton:deduced-param-attrs, r=oli-obk
Introduce deduced parameter attributes, and use them for deducing `readonly` on indirect immutable freeze by-value function parameters. Introduce deduced parameter attributes, and use them for deducing `readonly` on indirect immutable freeze by-value function parameters. Right now, `rustc` only examines function signatures and the platform ABI when determining the LLVM attributes to apply to parameters. This results in missed optimizations, because there are some attributes that can be determined via analysis of the MIR making up the function body. In particular, `readonly` could be applied to most indirectly-passed by-value function arguments (specifically, those that are freeze and are observed not to be mutated), but it currently is not. This patch introduces the machinery that allows `rustc` to determine those attributes. It consists of a query, `deduced_param_attrs`, that, when evaluated, analyzes the MIR of the function to determine supplementary attributes. The results of this query for each function are written into the crate metadata so that the deduced parameter attributes can be applied to cross-crate functions. In this patch, we simply check the parameter for mutations to determine whether the `readonly` attribute should be applied to parameters that are indirect immutable freeze by-value. More attributes could conceivably be deduced in the future: `nocapture` and `noalias` come to mind. Adding `readonly` to indirect function parameters where applicable enables some potential optimizations in LLVM that are discussed in [issue 103103] and [PR 103070] around avoiding stack-to-stack memory copies that appear in functions like `core::fmt::Write::write_fmt` and `core::panicking::assert_failed`. These functions pass a large structure unchanged by value to a subfunction that also doesn't mutate it. Since the structure in this case is passed as an indirect parameter, it's a pointer from LLVM's perspective. As a result, the intermediate copy of the structure that our codegen emits could be optimized away by LLVM's MemCpyOptimizer if it knew that the pointer is `readonly nocapture noalias` in both the caller and callee. We already pass `nocapture noalias`, but we're missing `readonly`, as we can't determine whether a by-value parameter is mutated by examining the signature in Rust. I didn't have much success with having LLVM infer the `readonly` attribute, even with fat LTO; it seems that deducing it at the MIR level is necessary. No large benefits should be expected from this optimization *now*; LLVM needs some changes (discussed in [PR 103070]) to more aggressively use the `noalias nocapture readonly` combination in its alias analysis. I have some LLVM patches for these optimizations and have had them looked over. With all the patches applied locally, I enabled LLVM to remove all the `memcpy`s from the following code: ```rust fn main() { println!("Hello {}", 3); } ``` which is a significant codegen improvement over the status quo. I expect that if this optimization kicks in in multiple places even for such a simple program, then it will apply to Rust code all over the place. [issue 103103]: rust-lang#103103 [PR 103070]: rust-lang#103070
2 parents 8f2c56a + da630ac commit eecde58

File tree

14 files changed

+393
-9
lines changed

14 files changed

+393
-9
lines changed

compiler/rustc_metadata/src/rmeta/decoder/cstore_impl.rs

+1
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,7 @@ provide! { tcx, def_id, other, cdata,
224224
fn_arg_names => { table }
225225
generator_kind => { table }
226226
trait_def => { table }
227+
deduced_param_attrs => { table }
227228
collect_trait_impl_trait_tys => {
228229
Ok(cdata
229230
.root

compiler/rustc_metadata/src/rmeta/encoder.rs

+16-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use rustc_middle::ty::query::Providers;
3131
use rustc_middle::ty::{self, SymbolName, Ty, TyCtxt};
3232
use rustc_middle::util::common::to_readable_str;
3333
use rustc_serialize::{opaque, Decodable, Decoder, Encodable, Encoder};
34-
use rustc_session::config::CrateType;
34+
use rustc_session::config::{CrateType, OptLevel};
3535
use rustc_session::cstore::{ForeignModule, LinkagePreference, NativeLib};
3636
use rustc_span::hygiene::{ExpnIndex, HygieneEncodeContext, MacroKind};
3737
use rustc_span::symbol::{sym, Symbol};
@@ -1478,6 +1478,21 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
14781478
record!(self.tables.unused_generic_params[def_id.to_def_id()] <- unused);
14791479
}
14801480
}
1481+
1482+
// Encode all the deduced parameter attributes for everything that has MIR, even for items
1483+
// that can't be inlined. But don't if we aren't optimizing in non-incremental mode, to
1484+
// save the query traffic.
1485+
if tcx.sess.opts.output_types.should_codegen()
1486+
&& tcx.sess.opts.optimize != OptLevel::No
1487+
&& tcx.sess.opts.incremental.is_none()
1488+
{
1489+
for &local_def_id in tcx.mir_keys(()) {
1490+
if let DefKind::AssocFn | DefKind::Fn = tcx.def_kind(local_def_id) {
1491+
record_array!(self.tables.deduced_param_attrs[local_def_id.to_def_id()] <-
1492+
self.tcx.deduced_param_attrs(local_def_id.to_def_id()));
1493+
}
1494+
}
1495+
}
14811496
}
14821497

14831498
fn encode_stability(&mut self, def_id: DefId) {

compiler/rustc_metadata/src/rmeta/mod.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use rustc_middle::mir;
2323
use rustc_middle::ty::fast_reject::SimplifiedType;
2424
use rustc_middle::ty::query::Providers;
2525
use rustc_middle::ty::{self, ReprOptions, Ty};
26-
use rustc_middle::ty::{GeneratorDiagnosticData, ParameterizedOverTcx, TyCtxt};
26+
use rustc_middle::ty::{DeducedParamAttrs, GeneratorDiagnosticData, ParameterizedOverTcx, TyCtxt};
2727
use rustc_serialize::opaque::FileEncoder;
2828
use rustc_session::config::SymbolManglingVersion;
2929
use rustc_session::cstore::{CrateDepKind, ForeignModule, LinkagePreference, NativeLib};
@@ -402,6 +402,7 @@ define_tables! {
402402
macro_definition: Table<DefIndex, LazyValue<ast::MacArgs>>,
403403
proc_macro: Table<DefIndex, MacroKind>,
404404
module_reexports: Table<DefIndex, LazyArray<ModChild>>,
405+
deduced_param_attrs: Table<DefIndex, LazyArray<DeducedParamAttrs>>,
405406

406407
trait_impl_trait_tys: Table<DefIndex, LazyValue<FxHashMap<DefId, Ty<'static>>>>,
407408
}

compiler/rustc_middle/src/query/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -2127,4 +2127,9 @@ rustc_queries! {
21272127
) -> Result<(), ErrorGuaranteed> {
21282128
desc { |tcx| "checking assoc const `{}` has the same type as trait item", tcx.def_path_str(key.0.to_def_id()) }
21292129
}
2130+
2131+
query deduced_param_attrs(def_id: DefId) -> &'tcx [ty::DeducedParamAttrs] {
2132+
desc { |tcx| "deducing parameter attributes for {}", tcx.def_path_str(def_id) }
2133+
separate_provide_extern
2134+
}
21302135
}

compiler/rustc_middle/src/ty/codec.rs

+1
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,7 @@ impl_arena_copy_decoder! {<'tcx>
455455
rustc_span::def_id::DefId,
456456
rustc_span::def_id::LocalDefId,
457457
(rustc_middle::middle::exported_symbols::ExportedSymbol<'tcx>, rustc_middle::middle::exported_symbols::SymbolExportInfo),
458+
ty::DeducedParamAttrs,
458459
}
459460

460461
#[macro_export]

compiler/rustc_middle/src/ty/context.rs

+15
Original file line numberDiff line numberDiff line change
@@ -2954,6 +2954,21 @@ impl<'tcx> TyCtxtAt<'tcx> {
29542954
}
29552955
}
29562956

2957+
/// Parameter attributes that can only be determined by examining the body of a function instead
2958+
/// of just its signature.
2959+
///
2960+
/// These can be useful for optimization purposes when a function is directly called. We compute
2961+
/// them and store them into the crate metadata so that downstream crates can make use of them.
2962+
///
2963+
/// Right now, we only have `read_only`, but `no_capture` and `no_alias` might be useful in the
2964+
/// future.
2965+
#[derive(Clone, Copy, PartialEq, Debug, Default, TyDecodable, TyEncodable, HashStable)]
2966+
pub struct DeducedParamAttrs {
2967+
/// The parameter is marked immutable in the function and contains no `UnsafeCell` (i.e. its
2968+
/// type is freeze).
2969+
pub read_only: bool,
2970+
}
2971+
29572972
// We are comparing types with different invariant lifetimes, so `ptr::eq`
29582973
// won't work for us.
29592974
fn ptr_eq<T, U>(t: *const T, u: *const U) -> bool {

compiler/rustc_middle/src/ty/mod.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ pub use self::consts::{
7878
};
7979
pub use self::context::{
8080
tls, CanonicalUserType, CanonicalUserTypeAnnotation, CanonicalUserTypeAnnotations,
81-
CtxtInterners, DelaySpanBugEmitted, FreeRegionInfo, GeneratorDiagnosticData,
81+
CtxtInterners, DeducedParamAttrs, DelaySpanBugEmitted, FreeRegionInfo, GeneratorDiagnosticData,
8282
GeneratorInteriorTypeCause, GlobalCtxt, Lift, OnDiskCache, TyCtxt, TypeckResults, UserType,
8383
UserTypeAnnotationIndex,
8484
};

compiler/rustc_middle/src/ty/parameterized.rs

+1
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ trivially_parameterized_over_tcx! {
6161
crate::middle::resolve_lifetime::ObjectLifetimeDefault,
6262
crate::mir::ConstQualifs,
6363
ty::AssocItemContainer,
64+
ty::DeducedParamAttrs,
6465
ty::Generics,
6566
ty::ImplPolarity,
6667
ty::ReprOptions,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
//! Deduces supplementary parameter attributes from MIR.
2+
//!
3+
//! Deduced parameter attributes are those that can only be soundly determined by examining the
4+
//! body of the function instead of just the signature. These can be useful for optimization
5+
//! purposes on a best-effort basis. We compute them here and store them into the crate metadata so
6+
//! dependent crates can use them.
7+
8+
use rustc_hir::def_id::DefId;
9+
use rustc_index::bit_set::BitSet;
10+
use rustc_middle::mir::visit::{NonMutatingUseContext, PlaceContext, Visitor};
11+
use rustc_middle::mir::{Body, Local, Location, Operand, Terminator, TerminatorKind, RETURN_PLACE};
12+
use rustc_middle::ty::{self, DeducedParamAttrs, ParamEnv, Ty, TyCtxt};
13+
use rustc_session::config::OptLevel;
14+
use rustc_span::DUMMY_SP;
15+
16+
/// A visitor that determines which arguments have been mutated. We can't use the mutability field
17+
/// on LocalDecl for this because it has no meaning post-optimization.
18+
struct DeduceReadOnly {
19+
/// Each bit is indexed by argument number, starting at zero (so 0 corresponds to local decl
20+
/// 1). The bit is true if the argument may have been mutated or false if we know it hasn't
21+
/// been up to the point we're at.
22+
mutable_args: BitSet<usize>,
23+
}
24+
25+
impl DeduceReadOnly {
26+
/// Returns a new DeduceReadOnly instance.
27+
fn new(arg_count: usize) -> Self {
28+
Self { mutable_args: BitSet::new_empty(arg_count) }
29+
}
30+
}
31+
32+
impl<'tcx> Visitor<'tcx> for DeduceReadOnly {
33+
fn visit_local(&mut self, local: Local, mut context: PlaceContext, _: Location) {
34+
// We're only interested in arguments.
35+
if local == RETURN_PLACE || local.index() > self.mutable_args.domain_size() {
36+
return;
37+
}
38+
39+
// Replace place contexts that are moves with copies. This is safe in all cases except
40+
// function argument position, which we already handled in `visit_terminator()` by using the
41+
// ArgumentChecker. See the comment in that method for more details.
42+
//
43+
// In the future, we might want to move this out into a separate pass, but for now let's
44+
// just do it on the fly because that's faster.
45+
if matches!(context, PlaceContext::NonMutatingUse(NonMutatingUseContext::Move)) {
46+
context = PlaceContext::NonMutatingUse(NonMutatingUseContext::Copy);
47+
}
48+
49+
match context {
50+
PlaceContext::MutatingUse(..)
51+
| PlaceContext::NonMutatingUse(NonMutatingUseContext::Move) => {
52+
// This is a mutation, so mark it as such.
53+
self.mutable_args.insert(local.index() - 1);
54+
}
55+
PlaceContext::NonMutatingUse(..) | PlaceContext::NonUse(..) => {
56+
// Not mutating, so it's fine.
57+
}
58+
}
59+
}
60+
61+
fn visit_terminator(&mut self, terminator: &Terminator<'tcx>, location: Location) {
62+
// OK, this is subtle. Suppose that we're trying to deduce whether `x` in `f` is read-only
63+
// and we have the following:
64+
//
65+
// fn f(x: BigStruct) { g(x) }
66+
// fn g(mut y: BigStruct) { y.foo = 1 }
67+
//
68+
// If, at the generated MIR level, `f` turned into something like:
69+
//
70+
// fn f(_1: BigStruct) -> () {
71+
// let mut _0: ();
72+
// bb0: {
73+
// _0 = g(move _1) -> bb1;
74+
// }
75+
// ...
76+
// }
77+
//
78+
// then it would be incorrect to mark `x` (i.e. `_1`) as `readonly`, because `g`'s write to
79+
// its copy of the indirect parameter would actually be a write directly to the pointer that
80+
// `f` passes. Note that function arguments are the only situation in which this problem can
81+
// arise: every other use of `move` in MIR doesn't actually write to the value it moves
82+
// from.
83+
//
84+
// Anyway, right now this situation doesn't actually arise in practice. Instead, the MIR for
85+
// that function looks like this:
86+
//
87+
// fn f(_1: BigStruct) -> () {
88+
// let mut _0: ();
89+
// let mut _2: BigStruct;
90+
// bb0: {
91+
// _2 = move _1;
92+
// _0 = g(move _2) -> bb1;
93+
// }
94+
// ...
95+
// }
96+
//
97+
// Because of that extra move that MIR construction inserts, `x` (i.e. `_1`) can *in
98+
// practice* safely be marked `readonly`.
99+
//
100+
// To handle the possibility that other optimizations (for example, destination propagation)
101+
// might someday generate MIR like the first example above, we panic upon seeing an argument
102+
// to *our* function that is directly moved into *another* function as an argument. Having
103+
// eliminated that problematic case, we can safely treat moves as copies in this analysis.
104+
//
105+
// In the future, if MIR optimizations cause arguments of a caller to be directly moved into
106+
// the argument of a callee, we can just add that argument to `mutated_args` instead of
107+
// panicking.
108+
//
109+
// Note that, because the problematic MIR is never actually generated, we can't add a test
110+
// case for this.
111+
112+
if let TerminatorKind::Call { ref args, .. } = terminator.kind {
113+
for arg in args {
114+
if let Operand::Move(_) = *arg {
115+
// ArgumentChecker panics if a direct move of an argument from a caller to a
116+
// callee was detected.
117+
//
118+
// If, in the future, MIR optimizations cause arguments to be moved directly
119+
// from callers to callees, change the panic to instead add the argument in
120+
// question to `mutating_uses`.
121+
ArgumentChecker::new(self.mutable_args.domain_size())
122+
.visit_operand(arg, location)
123+
}
124+
}
125+
};
126+
127+
self.super_terminator(terminator, location);
128+
}
129+
}
130+
131+
/// A visitor that simply panics if a direct move of an argument from a caller to a callee was
132+
/// detected.
133+
struct ArgumentChecker {
134+
/// The number of arguments to the calling function.
135+
arg_count: usize,
136+
}
137+
138+
impl ArgumentChecker {
139+
/// Creates a new ArgumentChecker.
140+
fn new(arg_count: usize) -> Self {
141+
Self { arg_count }
142+
}
143+
}
144+
145+
impl<'tcx> Visitor<'tcx> for ArgumentChecker {
146+
fn visit_local(&mut self, local: Local, context: PlaceContext, _: Location) {
147+
// Check to make sure that, if this local is an argument, we didn't move directly from it.
148+
if matches!(context, PlaceContext::NonMutatingUse(NonMutatingUseContext::Move))
149+
&& local != RETURN_PLACE
150+
&& local.index() <= self.arg_count
151+
{
152+
// If, in the future, MIR optimizations cause arguments to be moved directly from
153+
// callers to callees, change this panic to instead add the argument in question to
154+
// `mutating_uses`.
155+
panic!("Detected a direct move from a caller's argument to a callee's argument!")
156+
}
157+
}
158+
}
159+
160+
/// Returns true if values of a given type will never be passed indirectly, regardless of ABI.
161+
fn type_will_always_be_passed_directly<'tcx>(ty: Ty<'tcx>) -> bool {
162+
matches!(
163+
ty.kind(),
164+
ty::Bool
165+
| ty::Char
166+
| ty::Float(..)
167+
| ty::Int(..)
168+
| ty::RawPtr(..)
169+
| ty::Ref(..)
170+
| ty::Slice(..)
171+
| ty::Uint(..)
172+
)
173+
}
174+
175+
/// Returns the deduced parameter attributes for a function.
176+
///
177+
/// Deduced parameter attributes are those that can only be soundly determined by examining the
178+
/// body of the function instead of just the signature. These can be useful for optimization
179+
/// purposes on a best-effort basis. We compute them here and store them into the crate metadata so
180+
/// dependent crates can use them.
181+
pub fn deduced_param_attrs<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> &'tcx [DeducedParamAttrs] {
182+
// This computation is unfortunately rather expensive, so don't do it unless we're optimizing.
183+
// Also skip it in incremental mode.
184+
if tcx.sess.opts.optimize == OptLevel::No || tcx.sess.opts.incremental.is_some() {
185+
return &[];
186+
}
187+
188+
// If the Freeze language item isn't present, then don't bother.
189+
if tcx.lang_items().freeze_trait().is_none() {
190+
return &[];
191+
}
192+
193+
// Codegen won't use this information for anything if all the function parameters are passed
194+
// directly. Detect that and bail, for compilation speed.
195+
let fn_ty = tcx.type_of(def_id);
196+
if matches!(fn_ty.kind(), ty::FnDef(..)) {
197+
if fn_ty
198+
.fn_sig(tcx)
199+
.inputs()
200+
.skip_binder()
201+
.iter()
202+
.cloned()
203+
.all(type_will_always_be_passed_directly)
204+
{
205+
return &[];
206+
}
207+
}
208+
209+
// Don't deduce any attributes for functions that have no MIR.
210+
if !tcx.is_mir_available(def_id) {
211+
return &[];
212+
}
213+
214+
// Deduced attributes for other crates should be read from the metadata instead of via this
215+
// function.
216+
debug_assert!(def_id.is_local());
217+
218+
// Grab the optimized MIR. Analyze it to determine which arguments have been mutated.
219+
let body: &Body<'tcx> = tcx.optimized_mir(def_id);
220+
let mut deduce_read_only = DeduceReadOnly::new(body.arg_count);
221+
deduce_read_only.visit_body(body);
222+
223+
// Set the `readonly` attribute for every argument that we concluded is immutable and that
224+
// contains no UnsafeCells.
225+
//
226+
// FIXME: This is overly conservative around generic parameters: `is_freeze()` will always
227+
// return false for them. For a description of alternatives that could do a better job here,
228+
// see [1].
229+
//
230+
// [1]: https://github.com/rust-lang/rust/pull/103172#discussion_r999139997
231+
let mut deduced_param_attrs = tcx.arena.alloc_from_iter(
232+
body.local_decls.iter().skip(1).take(body.arg_count).enumerate().map(
233+
|(arg_index, local_decl)| DeducedParamAttrs {
234+
read_only: !deduce_read_only.mutable_args.contains(arg_index)
235+
&& local_decl.ty.is_freeze(tcx.at(DUMMY_SP), ParamEnv::reveal_all()),
236+
},
237+
),
238+
);
239+
240+
// Trailing parameters past the size of the `deduced_param_attrs` array are assumed to have the
241+
// default set of attributes, so we don't have to store them explicitly. Pop them off to save a
242+
// few bytes in metadata.
243+
while deduced_param_attrs.last() == Some(&DeducedParamAttrs::default()) {
244+
let last_index = deduced_param_attrs.len() - 1;
245+
deduced_param_attrs = &mut deduced_param_attrs[0..last_index];
246+
}
247+
248+
deduced_param_attrs
249+
}

compiler/rustc_mir_transform/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ mod const_prop_lint;
5656
mod coverage;
5757
mod dead_store_elimination;
5858
mod deaggregator;
59+
mod deduce_param_attrs;
5960
mod deduplicate_blocks;
6061
mod deref_separator;
6162
mod dest_prop;
@@ -139,6 +140,7 @@ pub fn provide(providers: &mut Providers) {
139140
promoted_mir_of_const_arg: |tcx, (did, param_did)| {
140141
promoted_mir(tcx, ty::WithOptConstParam { did, const_param_did: Some(param_did) })
141142
},
143+
deduced_param_attrs: deduce_param_attrs::deduced_param_attrs,
142144
..*providers
143145
};
144146
}

compiler/rustc_query_impl/src/on_disk_cache.rs

+1
Original file line numberDiff line numberDiff line change
@@ -848,6 +848,7 @@ impl_ref_decoder! {<'tcx>
848848
rustc_span::def_id::DefId,
849849
rustc_span::def_id::LocalDefId,
850850
(rustc_middle::middle::exported_symbols::ExportedSymbol<'tcx>, rustc_middle::middle::exported_symbols::SymbolExportInfo),
851+
ty::DeducedParamAttrs,
851852
}
852853

853854
//- ENCODING -------------------------------------------------------------------

0 commit comments

Comments
 (0)