diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index 714af977fb5d4..857ee12e01ba5 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -8,8 +8,8 @@ use rustc_session::config::{ FunctionReturn, InliningThreshold, Input, InstrumentCoverage, InstrumentXRay, LinkSelfContained, LinkerPluginLto, LocationDetail, LtoCli, MirSpanview, OomStrategy, Options, OutFileName, OutputType, OutputTypes, PAuthKey, PacRet, Passes, Polonius, - ProcMacroExecutionStrategy, Strip, SwitchWithOptPath, SymbolManglingVersion, TraitSolver, - WasiExecModel, + ProcMacroExecutionStrategy, Strip, SwitchWithOptPath, SymbolManglingPlugin, SymbolManglingVersion, + TraitSolver, WasiExecModel, }; use rustc_session::lint::Level; use rustc_session::search_paths::SearchPath; @@ -818,6 +818,7 @@ fn test_unstable_options_tracking_hash() { tracked!(split_lto_unit, Some(true)); tracked!(src_hash_algorithm, Some(SourceFileHashAlgorithm::Sha1)); tracked!(stack_protector, StackProtector::All); + tracked!(symbol_mangling_plugin, SymbolManglingPlugin::new()); tracked!(teach, true); tracked!(thinlto, Some(true)); tracked!(thir_unsafeck, true); diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index e694e150b314b..e4e88e9444c3b 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -21,6 +21,7 @@ use rustc_target::abi::Align; use rustc_target::spec::LinkSelfContainedComponents; use rustc_target::spec::{PanicStrategy, RelocModel, SanitizerSet, SplitDebuginfo}; use rustc_target::spec::{Target, TargetTriple, TargetWarnings, TARGETS}; +use std::cmp::PartialEq; use std::collections::btree_map::{ Iter as BTreeMapIter, Keys as BTreeMapKeysIter, Values as BTreeMapValuesIter, }; @@ -28,7 +29,7 @@ use std::collections::{BTreeMap, BTreeSet}; use std::ffi::OsStr; use std::fmt; use std::fs; -use std::hash::Hash; +use std::hash::{Hash, Hasher}; use std::iter; use std::path::{Path, PathBuf}; use std::str::{self, FromStr}; @@ -364,6 +365,126 @@ pub enum SymbolManglingVersion { V0, } +#[derive(Clone, Debug)] +pub struct SymbolManglingPlugin { + fulls: FxHashSet, + prefixes: Vec, + salt: String, + level: u8, + excluded: bool, +} + +impl SymbolManglingPlugin { + pub fn new() -> Self { + Self { + fulls: FxHashSet::default(), + prefixes: Vec::new(), + salt: String::new(), + level: 2, + excluded: false, + } + } + + pub fn enabled(&self) -> bool { + !self.fulls.is_empty() || !self.prefixes.is_empty() || self.excluded + } + + pub fn hasher_enable(&mut self, args: &str) -> bool { + let cloned = self.clone(); + if self.hasher_reinit(args) { + return true; + } + self.fulls = cloned.fulls; + self.prefixes = cloned.prefixes; + self.level = cloned.level; + self.salt = cloned.salt; + self.excluded = cloned.excluded; + false + } + + pub fn hasher_args(&self) -> (&str, u8) { + (&self.salt, self.level) + } + + pub fn hasher_contains(&self, val: &str) -> bool { + if self.fulls.contains(val) { + return self.excluded ^ true; + } + for prefix in self.prefixes.iter() { + if val.starts_with(prefix) { + return self.excluded ^ true; + } + } + self.excluded ^ false + } + + fn hasher_reinit(&mut self, args: &str) -> bool { + for arg in args.split(',') { + let mut it = arg.split('='); + let Some(name) = it.next() else { continue; }; + if let Some(value) = it.next() { + match name { + "salt" => self.salt = value.to_string(), + "level" => { + match value { + "1" => self.level = 1, + "2" => self.level = 2, + _ => return false, + } + }, + "excluded" => { + match value { + "true" => self.excluded = true, + "false" => self.excluded = false, + _ => return false, + } + }, + _ => return false, + } + } else if name.ends_with("*") { + let _ = self.prefixes.push(name[..name.len() - 1].to_string()); + } else { + let _ = self.fulls.insert(name.to_string()); + } + } + true + } + + fn to_vec(&self) -> Vec<&str> { + let mut ret = Vec::with_capacity(self.fulls.len() + self.prefixes.len()); + #[allow(rustc::potential_query_instability)] + self.fulls.iter().for_each(|val| ret.push(val.as_str())); + ret.sort(); + self.prefixes.iter().for_each(|val| ret.push(val.as_str())); + ret[self.fulls.len()..].sort(); + ret + } +} + +impl Hash for SymbolManglingPlugin { + fn hash(&self, hasher: &mut H) where H: Hasher { + for val in self.to_vec() { + val.hash(hasher); + } + self.fulls.len().hash(hasher); + self.prefixes.len().hash(hasher); + self.salt.hash(hasher); + self.level.hash(hasher); + self.excluded.hash(hasher); + } +} + +impl PartialEq for SymbolManglingPlugin { + fn eq(&self, other: &Self) -> bool { + self.excluded == other.excluded && + self.level == other.level && + self.salt == other.salt && + self.fulls.len() == other.fulls.len() && + self.prefixes.len() == other.prefixes.len() && + self.to_vec() == other.to_vec() + } +} + #[derive(Clone, Copy, Debug, PartialEq, Hash)] pub enum DebugInfo { None, @@ -2717,6 +2838,12 @@ pub fn build_session_options( ); } + if unstable_opts.symbol_mangling_plugin.enabled() { + handler.early_error( + "option `-C instrument-coverage` is not compatible with either `-Z symbol_mangling_plugin`" + ); + } + // `-C instrument-coverage` implies `-C symbol-mangling-version=v0` - to ensure consistent // and reversible name mangling. Note, LLVM coverage tools can analyze coverage over // multiple runs, including some changes to source code; so mangled names must be consistent @@ -3176,8 +3303,8 @@ pub(crate) mod dep_tracking { ErrorOutputType, FunctionReturn, InliningThreshold, InstrumentCoverage, InstrumentXRay, LinkerPluginLto, LocationDetail, LtoCli, OomStrategy, OptLevel, OutFileName, OutputType, OutputTypes, Polonius, RemapPathScopeComponents, ResolveDocLinks, SourceFileHashAlgorithm, - SplitDwarfKind, SwitchWithOptPath, SymbolManglingVersion, TraitSolver, TrimmedDefPaths, - WasiExecModel, + SplitDwarfKind, SwitchWithOptPath, SymbolManglingPlugin, SymbolManglingVersion, TraitSolver, + TrimmedDefPaths, WasiExecModel, }; use crate::lint; use crate::utils::NativeLib; @@ -3268,6 +3395,7 @@ pub(crate) mod dep_tracking { SplitDwarfKind, StackProtector, SwitchWithOptPath, + SymbolManglingPlugin, SymbolManglingVersion, RemapPathScopeComponents, SourceFileHashAlgorithm, diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 7a6108bfbe24c..89a0595cd8677 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -402,6 +402,7 @@ mod desc { pub const parse_switch_with_opt_path: &str = "an optional path to the profiling data output directory"; pub const parse_merge_functions: &str = "one of: `disabled`, `trampolines`, or `aliases`"; + pub const parse_symbol_mangling_plugin: &str = "configuration parameters of the hasher plugin: `hasher:[*],...[,salt=][,level=1|2][,excluded=true|false]`"; pub const parse_symbol_mangling_version: &str = "either `legacy` or `v0` (RFC 2603)"; pub const parse_src_file_hash: &str = "either `md5` or `sha1`"; pub const parse_relocation_model: &str = @@ -1171,6 +1172,21 @@ mod parse { true } + pub(crate) fn parse_symbol_mangling_plugin( + slot: &mut SymbolManglingPlugin, + v: Option<&str>, + ) -> bool { + if let Some(v) = v { + // only support hasher + let plugin = "hasher:"; + if v.starts_with(plugin) { + return slot.hasher_enable(&v[plugin.len()..]); + } + return false; + } + true + } + pub(crate) fn parse_src_file_hash( slot: &mut Option, v: Option<&str>, @@ -1880,6 +1896,8 @@ written to standard error output)"), "prefer dynamic linking to static linking for staticlibs (default: no)"), strict_init_checks: bool = (false, parse_bool, [TRACKED], "control if mem::uninitialized and mem::zeroed panic on more UB"), + symbol_mangling_plugin: SymbolManglingPlugin = (SymbolManglingPlugin::new(), parse_symbol_mangling_plugin, [TRACKED], + "the hasher plugin controls symbol scope and hash parameter that require hash (default: hash is not required)"), #[rustc_lint_opt_deny_field_access("use `Session::teach` instead of this field")] teach: bool = (false, parse_bool, [TRACKED], "show extended diagnostic help (default: no)"), diff --git a/compiler/rustc_symbol_mangling/src/lib.rs b/compiler/rustc_symbol_mangling/src/lib.rs index 311b94d9e0e6f..0719bcaac6df6 100644 --- a/compiler/rustc_symbol_mangling/src/lib.rs +++ b/compiler/rustc_symbol_mangling/src/lib.rs @@ -114,6 +114,7 @@ use rustc_session::config::SymbolManglingVersion; mod legacy; mod v0; +mod plugin; pub mod errors; pub mod test; @@ -268,6 +269,12 @@ fn compute_symbol_name<'tcx>( SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate), }; + let symbol = if tcx.sess.opts.unstable_opts.symbol_mangling_plugin.enabled() { + plugin::process(tcx, symbol, def_id) + } else { + symbol + }; + debug_assert!( rustc_demangle::try_demangle(&symbol).is_ok(), "compute_symbol_name: `{symbol}` cannot be demangled" diff --git a/compiler/rustc_symbol_mangling/src/plugin.rs b/compiler/rustc_symbol_mangling/src/plugin.rs new file mode 100644 index 0000000000000..4dd73e861cc0b --- /dev/null +++ b/compiler/rustc_symbol_mangling/src/plugin.rs @@ -0,0 +1,53 @@ +use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher}; +use rustc_hir::def_id::DefId; +use rustc_middle::ty::TyCtxt; + +pub(super) fn process<'tcx>( + tcx: TyCtxt<'tcx>, + symbol: String, + def_id: DefId, +) -> String { + let crate_name = tcx.crate_name(def_id.krate); + let crate_name = crate_name.as_str(); + let symbol_mangling_plugin = &tcx.sess.opts.unstable_opts.symbol_mangling_plugin; + if !symbol_mangling_plugin.hasher_contains(crate_name) { + return symbol; + } + + let (salt, level) = symbol_mangling_plugin.hasher_args(); + + let hash = tcx.with_stable_hashing_context(|mut hcx| { + let mut hasher = StableHasher::new(); + symbol.hash_stable(&mut hcx, &mut hasher); + salt.hash_stable(&mut hcx, &mut hasher); + hasher.finish::().as_u64() + }); + + match level { + 1 => encode_1(tcx, crate_name, hash, def_id), + _ => encode_2(tcx, crate_name, hash, def_id), + } +} + +fn encode_1<'tcx>( + tcx: TyCtxt<'tcx>, + crate_name: &str, + hash: u64, + def_id: DefId, +) -> String { + if let Some(item_name) = tcx.opt_item_name(def_id) { + let item_name = item_name.as_str(); + format!("_ZN{}{crate_name}.{item_name}.{:08x}E", crate_name.len() + item_name.len() + 11, hash >> 8) + } else { + encode_2(tcx, crate_name, hash, def_id) + } +} + +fn encode_2<'tcx>( + _tcx: TyCtxt<'tcx>, + crate_name: &str, + hash: u64, + _def_id: DefId, +) -> String { + format!("_ZN{}{crate_name}.{hash:016x}E", crate_name.len() + 18) +} diff --git a/src/doc/unstable-book/src/compiler-flags/symbol_mangling_plugin.md b/src/doc/unstable-book/src/compiler-flags/symbol_mangling_plugin.md new file mode 100644 index 0000000000000..ddd83a8d56782 --- /dev/null +++ b/src/doc/unstable-book/src/compiler-flags/symbol_mangling_plugin.md @@ -0,0 +1,20 @@ +# `symbol_mangling_plugin` + +Instead of defining a new mangling rule, it provides a plug-in for reprocessing mangling symbol names. + +The average length of symbol names in the rust standard library is about 100 bytes, while the average length of symbol names in the C++ standard library is about 65 bytes. In some embedded environments where dynamic library are widely used, rust dynamic library symbol name space hash become one of the key bottlenecks of application. The plug-in mechanism provided here can help us eliminate this bottlenech. + +The plug-in information is not written into the generated binary file. Therefore, you need to ensure that the plug-in configuration is consistent in multiple build environments. For example, the configuration parameters of the plug-in must be consistent in the build project of the dynamic library and the build project that depends on the dynamic library. Otherwise, an `undefined symbol` or `undefined version` error occurs. + +The value of this parameter is in the format of `-Z symbol_mangling_plugin=:`. Currently only one plug-in is available: `hasher`. + +## Hasher plug-in + +The configuration format is `-Z symbol_mangling_plugin=hasher:[*],...[,excluded=true|false][,level=1|2][,salt=]`. + +In the preceding information, `` matches the name of the crate. If the name ends with `*`, the prefix is matched. The hasher plug-in only reprocesses the symbol names in (or not in, if `excluded=true`) specified crate. The hasher plug-in uses the hash value to replace the complete symbol names, compressing the symbol name space and avoid symbol conflicts. + +If `level=`, the new symbol name format is `_ZN{length}{crate}.{item}.{hash32}E`. Otherwise, the new symbol name format is `_ZN{length}{crate}.{hash64}E`, which is the default format. + +`salt` can specify a salt value for hash calculation, which reduces security risks caused by malicious replacement of dynamic libraries and increases security. +