Skip to content

Commit ebf0cf7

Browse files
committed
Auto merge of rust-lang#137586 - nnethercote:SetImpliedBits, r=bjorn3
Speed up target feature computation The LLVM backend calls `LLVMRustHasFeature` twice for every feature. In short-running rustc invocations, this accounts for a surprising amount of work. r? `@bjorn3`
2 parents 705421b + cee3114 commit ebf0cf7

File tree

9 files changed

+120
-113
lines changed

9 files changed

+120
-113
lines changed

compiler/rustc_codegen_cranelift/src/lib.rs

+6-7
Original file line numberDiff line numberDiff line change
@@ -176,13 +176,9 @@ impl CodegenBackend for CraneliftCodegenBackend {
176176
}
177177
}
178178

179-
fn target_features_cfg(
180-
&self,
181-
sess: &Session,
182-
_allow_unstable: bool,
183-
) -> Vec<rustc_span::Symbol> {
179+
fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
184180
// FIXME return the actually used target features. this is necessary for #[cfg(target_feature)]
185-
if sess.target.arch == "x86_64" && sess.target.os != "none" {
181+
let target_features = if sess.target.arch == "x86_64" && sess.target.os != "none" {
186182
// x86_64 mandates SSE2 support and rustc requires the x87 feature to be enabled
187183
vec![sym::fsxr, sym::sse, sym::sse2, Symbol::intern("x87")]
188184
} else if sess.target.arch == "aarch64" {
@@ -196,7 +192,10 @@ impl CodegenBackend for CraneliftCodegenBackend {
196192
}
197193
} else {
198194
vec![]
199-
}
195+
};
196+
// FIXME do `unstable_target_features` properly
197+
let unstable_target_features = target_features.clone();
198+
(target_features, unstable_target_features)
200199
}
201200

202201
fn print_version(&self) {

compiler/rustc_codegen_gcc/src/gcc_util.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ pub(crate) fn global_gcc_features(sess: &Session, diagnostics: bool) -> Vec<Stri
4848
for feature in sess.opts.cg.target_feature.split(',') {
4949
if let Some(feature) = feature.strip_prefix('+') {
5050
all_rust_features.extend(
51-
UnordSet::from(sess.target.implied_target_features(std::iter::once(feature)))
51+
UnordSet::from(sess.target.implied_target_features(feature))
5252
.to_sorted_stable_ord()
5353
.iter()
5454
.map(|&&s| (true, s)),

compiler/rustc_codegen_gcc/src/lib.rs

+37-31
Original file line numberDiff line numberDiff line change
@@ -259,8 +259,8 @@ impl CodegenBackend for GccCodegenBackend {
259259
.join(sess)
260260
}
261261

262-
fn target_features_cfg(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
263-
target_features_cfg(sess, allow_unstable, &self.target_info)
262+
fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
263+
target_features_cfg(sess, &self.target_info)
264264
}
265265
}
266266

@@ -486,35 +486,41 @@ fn to_gcc_opt_level(optlevel: Option<OptLevel>) -> OptimizationLevel {
486486
/// Returns the features that should be set in `cfg(target_feature)`.
487487
fn target_features_cfg(
488488
sess: &Session,
489-
allow_unstable: bool,
490489
target_info: &LockedTargetInfo,
491-
) -> Vec<Symbol> {
490+
) -> (Vec<Symbol>, Vec<Symbol>) {
492491
// TODO(antoyo): use global_gcc_features.
493-
sess.target
494-
.rust_target_features()
495-
.iter()
496-
.filter_map(|&(feature, gate, _)| {
497-
if allow_unstable
498-
|| (gate.in_cfg() && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
499-
{
500-
Some(feature)
501-
} else {
502-
None
503-
}
504-
})
505-
.filter(|feature| {
506-
// TODO: we disable Neon for now since we don't support the LLVM intrinsics for it.
507-
if *feature == "neon" {
508-
return false;
509-
}
510-
target_info.cpu_supports(feature)
511-
/*
512-
adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma,
513-
avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
514-
bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
515-
sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
516-
*/
517-
})
518-
.map(Symbol::intern)
519-
.collect()
492+
let f = |allow_unstable| {
493+
sess.target
494+
.rust_target_features()
495+
.iter()
496+
.filter_map(|&(feature, gate, _)| {
497+
if allow_unstable
498+
|| (gate.in_cfg()
499+
&& (sess.is_nightly_build() || gate.requires_nightly().is_none()))
500+
{
501+
Some(feature)
502+
} else {
503+
None
504+
}
505+
})
506+
.filter(|feature| {
507+
// TODO: we disable Neon for now since we don't support the LLVM intrinsics for it.
508+
if *feature == "neon" {
509+
return false;
510+
}
511+
target_info.cpu_supports(feature)
512+
/*
513+
adx, aes, avx, avx2, avx512bf16, avx512bitalg, avx512bw, avx512cd, avx512dq, avx512er, avx512f, avx512fp16, avx512ifma,
514+
avx512pf, avx512vbmi, avx512vbmi2, avx512vl, avx512vnni, avx512vp2intersect, avx512vpopcntdq,
515+
bmi1, bmi2, cmpxchg16b, ermsb, f16c, fma, fxsr, gfni, lzcnt, movbe, pclmulqdq, popcnt, rdrand, rdseed, rtm,
516+
sha, sse, sse2, sse3, sse4.1, sse4.2, sse4a, ssse3, tbm, vaes, vpclmulqdq, xsave, xsavec, xsaveopt, xsaves
517+
*/
518+
})
519+
.map(Symbol::intern)
520+
.collect()
521+
};
522+
523+
let target_features = f(false);
524+
let unstable_target_features = f(true);
525+
(target_features, unstable_target_features)
520526
}

compiler/rustc_codegen_llvm/src/lib.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -341,8 +341,8 @@ impl CodegenBackend for LlvmCodegenBackend {
341341
llvm_util::print_version();
342342
}
343343

344-
fn target_features_cfg(&self, sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
345-
target_features_cfg(sess, allow_unstable)
344+
fn target_features_cfg(&self, sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
345+
target_features_cfg(sess)
346346
}
347347

348348
fn codegen_crate<'tcx>(

compiler/rustc_codegen_llvm/src/llvm_util.rs

+58-57
Original file line numberDiff line numberDiff line change
@@ -306,45 +306,44 @@ pub(crate) fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> Option<LLVMFea
306306
/// Must express features in the way Rust understands them.
307307
///
308308
/// We do not have to worry about RUSTC_SPECIFIC_FEATURES here, those are handled outside codegen.
309-
pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<Symbol> {
310-
let mut features: FxHashSet<Symbol> = Default::default();
311-
309+
pub(crate) fn target_features_cfg(sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
312310
// Add base features for the target.
313311
// We do *not* add the -Ctarget-features there, and instead duplicate the logic for that below.
314312
// The reason is that if LLVM considers a feature implied but we do not, we don't want that to
315313
// show up in `cfg`. That way, `cfg` is entirely under our control -- except for the handling of
316-
// the target CPU, that is still expanded to target features (with all their implied features) by
317-
// LLVM.
314+
// the target CPU, that is still expanded to target features (with all their implied features)
315+
// by LLVM.
318316
let target_machine = create_informational_target_machine(sess, true);
319-
// Compute which of the known target features are enabled in the 'base' target machine.
320-
// We only consider "supported" features; "forbidden" features are not reflected in `cfg` as of now.
321-
features.extend(
322-
sess.target
323-
.rust_target_features()
324-
.iter()
325-
.filter(|(feature, _, _)| {
326-
// skip checking special features, as LLVM may not understand them
327-
if RUSTC_SPECIAL_FEATURES.contains(feature) {
328-
return true;
329-
}
330-
// check that all features in a given smallvec are enabled
331-
if let Some(feat) = to_llvm_features(sess, feature) {
332-
for llvm_feature in feat {
333-
let cstr = SmallCStr::new(llvm_feature);
334-
if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) }
335-
{
336-
return false;
337-
}
317+
// Compute which of the known target features are enabled in the 'base' target machine. We only
318+
// consider "supported" features; "forbidden" features are not reflected in `cfg` as of now.
319+
let mut features: FxHashSet<Symbol> = sess
320+
.target
321+
.rust_target_features()
322+
.iter()
323+
.filter(|(feature, _, _)| {
324+
// skip checking special features, as LLVM may not understand them
325+
if RUSTC_SPECIAL_FEATURES.contains(feature) {
326+
return true;
327+
}
328+
if let Some(feat) = to_llvm_features(sess, feature) {
329+
for llvm_feature in feat {
330+
let cstr = SmallCStr::new(llvm_feature);
331+
// `LLVMRustHasFeature` is moderately expensive. On targets with many
332+
// features (e.g. x86) these calls take a non-trivial fraction of runtime
333+
// when compiling very small programs.
334+
if !unsafe { llvm::LLVMRustHasFeature(target_machine.raw(), cstr.as_ptr()) } {
335+
return false;
338336
}
339-
true
340-
} else {
341-
false
342337
}
343-
})
344-
.map(|(feature, _, _)| Symbol::intern(feature)),
345-
);
338+
true
339+
} else {
340+
false
341+
}
342+
})
343+
.map(|(feature, _, _)| Symbol::intern(feature))
344+
.collect();
346345

347-
// Add enabled features
346+
// Add enabled and remove disabled features.
348347
for (enabled, feature) in
349348
sess.opts.cg.target_feature.split(',').filter_map(|s| match s.chars().next() {
350349
Some('+') => Some((true, Symbol::intern(&s[1..]))),
@@ -360,7 +359,7 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
360359
#[allow(rustc::potential_query_instability)]
361360
features.extend(
362361
sess.target
363-
.implied_target_features(std::iter::once(feature.as_str()))
362+
.implied_target_features(feature.as_str())
364363
.iter()
365364
.map(|s| Symbol::intern(s)),
366365
);
@@ -371,11 +370,7 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
371370
// `features.contains` below.
372371
#[allow(rustc::potential_query_instability)]
373372
features.retain(|f| {
374-
if sess
375-
.target
376-
.implied_target_features(std::iter::once(f.as_str()))
377-
.contains(&feature.as_str())
378-
{
373+
if sess.target.implied_target_features(f.as_str()).contains(&feature.as_str()) {
379374
// If `f` if implies `feature`, then `!feature` implies `!f`, so we have to
380375
// remove `f`. (This is the standard logical contraposition principle.)
381376
false
@@ -387,25 +382,31 @@ pub(crate) fn target_features_cfg(sess: &Session, allow_unstable: bool) -> Vec<S
387382
}
388383
}
389384

390-
// Filter enabled features based on feature gates
391-
sess.target
392-
.rust_target_features()
393-
.iter()
394-
.filter_map(|(feature, gate, _)| {
395-
// The `allow_unstable` set is used by rustc internally to determined which target
396-
// features are truly available, so we want to return even perma-unstable "forbidden"
397-
// features.
398-
if allow_unstable
399-
|| (gate.in_cfg() && (sess.is_nightly_build() || gate.requires_nightly().is_none()))
400-
{
401-
Some(*feature)
402-
} else {
403-
None
404-
}
405-
})
406-
.filter(|feature| features.contains(&Symbol::intern(feature)))
407-
.map(|feature| Symbol::intern(feature))
408-
.collect()
385+
// Filter enabled features based on feature gates.
386+
let f = |allow_unstable| {
387+
sess.target
388+
.rust_target_features()
389+
.iter()
390+
.filter_map(|(feature, gate, _)| {
391+
// The `allow_unstable` set is used by rustc internally to determined which target
392+
// features are truly available, so we want to return even perma-unstable
393+
// "forbidden" features.
394+
if allow_unstable
395+
|| (gate.in_cfg()
396+
&& (sess.is_nightly_build() || gate.requires_nightly().is_none()))
397+
{
398+
Some(Symbol::intern(feature))
399+
} else {
400+
None
401+
}
402+
})
403+
.filter(|feature| features.contains(&feature))
404+
.collect()
405+
};
406+
407+
let target_features = f(false);
408+
let unstable_target_features = f(true);
409+
(target_features, unstable_target_features)
409410
}
410411

411412
pub(crate) fn print_version() {
@@ -682,7 +683,7 @@ pub(crate) fn global_llvm_features(
682683
for feature in sess.opts.cg.target_feature.split(',') {
683684
if let Some(feature) = feature.strip_prefix('+') {
684685
all_rust_features.extend(
685-
UnordSet::from(sess.target.implied_target_features(std::iter::once(feature)))
686+
UnordSet::from(sess.target.implied_target_features(feature))
686687
.to_sorted_stable_ord()
687688
.iter()
688689
.map(|&&s| (true, s)),

compiler/rustc_codegen_ssa/src/target_features.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ pub(crate) fn provide(providers: &mut Providers) {
190190
},
191191
implied_target_features: |tcx, feature: Symbol| {
192192
let feature = feature.as_str();
193-
UnordSet::from(tcx.sess.target.implied_target_features(std::iter::once(feature)))
193+
UnordSet::from(tcx.sess.target.implied_target_features(feature))
194194
.into_sorted_stable_ord()
195195
.into_iter()
196196
.map(|s| Symbol::intern(s))

compiler/rustc_codegen_ssa/src/traits/backend.rs

+6-3
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,13 @@ pub trait CodegenBackend {
4545

4646
fn print(&self, _req: &PrintRequest, _out: &mut String, _sess: &Session) {}
4747

48-
/// Returns the features that should be set in `cfg(target_features)`.
48+
/// Returns two feature sets:
49+
/// - The first has the features that should be set in `cfg(target_features)`.
50+
/// - The second is like the first, but also includes unstable features.
51+
///
4952
/// RUSTC_SPECIFIC_FEATURES should be skipped here, those are handled outside codegen.
50-
fn target_features_cfg(&self, _sess: &Session, _allow_unstable: bool) -> Vec<Symbol> {
51-
vec![]
53+
fn target_features_cfg(&self, _sess: &Session) -> (Vec<Symbol>, Vec<Symbol>) {
54+
(vec![], vec![])
5255
}
5356

5457
fn print_passes(&self) {}

compiler/rustc_interface/src/util.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@ pub(crate) fn add_configuration(
3939
) {
4040
let tf = sym::target_feature;
4141

42-
let unstable_target_features = codegen_backend.target_features_cfg(sess, true);
43-
sess.unstable_target_features.extend(unstable_target_features.iter().cloned());
42+
let (target_features, unstable_target_features) = codegen_backend.target_features_cfg(sess);
4443

45-
let target_features = codegen_backend.target_features_cfg(sess, false);
46-
sess.target_features.extend(target_features.iter().cloned());
44+
sess.unstable_target_features.extend(unstable_target_features.iter().copied());
45+
46+
sess.target_features.extend(target_features.iter().copied());
4747

4848
cfg.extend(target_features.into_iter().map(|feat| (tf, Some(feat))));
4949

compiler/rustc_target/src/target_features.rs

+5-7
Original file line numberDiff line numberDiff line change
@@ -768,17 +768,15 @@ impl Target {
768768
}
769769
}
770770

771-
pub fn implied_target_features<'a>(
772-
&self,
773-
base_features: impl Iterator<Item = &'a str>,
774-
) -> FxHashSet<&'a str> {
771+
// Note: the returned set includes `base_feature`.
772+
pub fn implied_target_features<'a>(&self, base_feature: &'a str) -> FxHashSet<&'a str> {
775773
let implied_features =
776774
self.rust_target_features().iter().map(|(f, _, i)| (f, i)).collect::<FxHashMap<_, _>>();
777775

778-
// implied target features have their own implied target features, so we traverse the
779-
// map until there are no more features to add
776+
// Implied target features have their own implied target features, so we traverse the
777+
// map until there are no more features to add.
780778
let mut features = FxHashSet::default();
781-
let mut new_features = base_features.collect::<Vec<&str>>();
779+
let mut new_features = vec![base_feature];
782780
while let Some(new_feature) = new_features.pop() {
783781
if features.insert(new_feature) {
784782
if let Some(implied_features) = implied_features.get(&new_feature) {

0 commit comments

Comments
 (0)