Skip to content

Commit 0d7b2fb

Browse files
committed
Auto merge of rust-lang#123441 - saethlin:fixed-len-file-names, r=oli-obk
Stabilize the size of incr comp object file names The current implementation does not produce stable-length paths, and we create the paths in a way that makes our allocation behavior is nondeterministic. I think `@eddyb` fixed a number of other cases like this in the past, and this PR fixes another one. Whether that actually matters I have no idea, but we still have bimodal behavior in rustc-perf and the non-uniformity in `find` and `ls` was bothering me. I've also removed the truncation of the mangled CGU names. Before this PR incr comp paths look like this: ``` target/debug/incremental/scratch-38izrrq90cex7/s-gux6gz0ow8-1ph76gg-ewe1xj434l26w9up5bedsojpd/261xgo1oqnd90ry5.o ``` And after, they look like this: ``` target/debug/incremental/scratch-035omutqbfkbw/s-gux6borni0-16r3v1j-6n64tmwqzchtgqzwwim5amuga/55v2re42sztc8je9bva6g8ft3.o ``` On the one hand, I'm sure this will break some people's builds because they're on Windows and only a few bytes from the path length limit. But if we're that seriously worried about the length of our file names, I have some other ideas on how to make them smaller. And last time I deleted some hash truncations from the compiler, there was a huge drop in the number if incremental compilation ICEs that were reported: rust-lang#110367 --- Upon further reading, this PR actually fixes a bug. This comment says the CGU names are supposed to be a fixed-length hash, and before this PR they aren't: https://github.com/rust-lang/rust/blob/ca7d34efa94afe271accf2bd3d44152a5bd6fff1/compiler/rustc_monomorphize/src/partitioning.rs#L445-L448
2 parents d6d3b34 + 6ee3713 commit 0d7b2fb

File tree

9 files changed

+145
-70
lines changed

9 files changed

+145
-70
lines changed

compiler/rustc_codegen_gcc/src/context.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ use gccjit::{
66
use rustc_codegen_ssa::base::wants_msvc_seh;
77
use rustc_codegen_ssa::errors as ssa_errors;
88
use rustc_codegen_ssa::traits::{BackendTypes, BaseTypeMethods, MiscMethods};
9-
use rustc_data_structures::base_n;
9+
use rustc_data_structures::base_n::ToBaseN;
10+
use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
1011
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
1112
use rustc_middle::mir::mono::CodegenUnit;
1213
use rustc_middle::span_bug;
@@ -621,7 +622,7 @@ impl<'b, 'tcx> CodegenCx<'b, 'tcx> {
621622
let mut name = String::with_capacity(prefix.len() + 6);
622623
name.push_str(prefix);
623624
name.push_str(".");
624-
base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name);
625+
name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY));
625626
name
626627
}
627628
}

compiler/rustc_codegen_llvm/src/context.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ use crate::value::Value;
1111
use rustc_codegen_ssa::base::{wants_msvc_seh, wants_wasm_eh};
1212
use rustc_codegen_ssa::errors as ssa_errors;
1313
use rustc_codegen_ssa::traits::*;
14-
use rustc_data_structures::base_n;
14+
use rustc_data_structures::base_n::ToBaseN;
15+
use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
1516
use rustc_data_structures::fx::FxHashMap;
1617
use rustc_data_structures::small_c_str::SmallCStr;
1718
use rustc_hir::def_id::DefId;
@@ -1015,7 +1016,7 @@ impl CodegenCx<'_, '_> {
10151016
let mut name = String::with_capacity(prefix.len() + 6);
10161017
name.push_str(prefix);
10171018
name.push('.');
1018-
base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name);
1019+
name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY));
10191020
name
10201021
}
10211022
}
+90-24
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/// Converts unsigned integers into a string representation with some base.
22
/// Bases up to and including 36 can be used for case-insensitive things.
3-
use std::str;
3+
use std::ascii;
4+
use std::fmt;
45

56
#[cfg(test)]
67
mod tests;
@@ -9,36 +10,101 @@ pub const MAX_BASE: usize = 64;
910
pub const ALPHANUMERIC_ONLY: usize = 62;
1011
pub const CASE_INSENSITIVE: usize = 36;
1112

12-
const BASE_64: &[u8; MAX_BASE] =
13-
b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
13+
const BASE_64: [ascii::Char; MAX_BASE] = {
14+
let bytes = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ@$";
15+
let Some(ascii) = bytes.as_ascii() else { panic!() };
16+
*ascii
17+
};
1418

15-
#[inline]
16-
pub fn push_str(mut n: u128, base: usize, output: &mut String) {
17-
debug_assert!(base >= 2 && base <= MAX_BASE);
18-
let mut s = [0u8; 128];
19-
let mut index = s.len();
19+
pub struct BaseNString {
20+
start: usize,
21+
buf: [ascii::Char; 128],
22+
}
23+
24+
impl std::ops::Deref for BaseNString {
25+
type Target = str;
2026

21-
let base = base as u128;
27+
fn deref(&self) -> &str {
28+
self.buf[self.start..].as_str()
29+
}
30+
}
31+
32+
impl AsRef<str> for BaseNString {
33+
fn as_ref(&self) -> &str {
34+
self.buf[self.start..].as_str()
35+
}
36+
}
37+
38+
impl fmt::Display for BaseNString {
39+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40+
f.write_str(self)
41+
}
42+
}
43+
44+
// This trait just lets us reserve the exact right amount of space when doing fixed-length
45+
// case-insensitve encoding. Add any impls you need.
46+
pub trait ToBaseN: Into<u128> {
47+
fn encoded_len(base: usize) -> usize;
48+
49+
fn to_base_fixed_len(self, base: usize) -> BaseNString {
50+
let mut encoded = self.to_base(base);
51+
encoded.start = encoded.buf.len() - Self::encoded_len(base);
52+
encoded
53+
}
2254

23-
loop {
24-
index -= 1;
25-
s[index] = BASE_64[(n % base) as usize];
26-
n /= base;
55+
fn to_base(self, base: usize) -> BaseNString {
56+
let mut output = [ascii::Char::Digit0; 128];
2757

28-
if n == 0 {
29-
break;
58+
let mut n: u128 = self.into();
59+
60+
let mut index = output.len();
61+
loop {
62+
index -= 1;
63+
output[index] = BASE_64[(n % base as u128) as usize];
64+
n /= base as u128;
65+
66+
if n == 0 {
67+
break;
68+
}
69+
}
70+
assert_eq!(n, 0);
71+
72+
BaseNString { start: index, buf: output }
73+
}
74+
}
75+
76+
impl ToBaseN for u128 {
77+
fn encoded_len(base: usize) -> usize {
78+
let mut max = u128::MAX;
79+
let mut len = 0;
80+
while max > 0 {
81+
len += 1;
82+
max /= base as u128;
3083
}
84+
len
3185
}
86+
}
3287

33-
output.push_str(unsafe {
34-
// SAFETY: `s` is populated using only valid utf8 characters from `BASE_64`
35-
str::from_utf8_unchecked(&s[index..])
36-
});
88+
impl ToBaseN for u64 {
89+
fn encoded_len(base: usize) -> usize {
90+
let mut max = u64::MAX;
91+
let mut len = 0;
92+
while max > 0 {
93+
len += 1;
94+
max /= base as u64;
95+
}
96+
len
97+
}
3798
}
3899

39-
#[inline]
40-
pub fn encode(n: u128, base: usize) -> String {
41-
let mut s = String::new();
42-
push_str(n, base, &mut s);
43-
s
100+
impl ToBaseN for u32 {
101+
fn encoded_len(base: usize) -> usize {
102+
let mut max = u32::MAX;
103+
let mut len = 0;
104+
while max > 0 {
105+
len += 1;
106+
max /= base as u32;
107+
}
108+
len
109+
}
44110
}

compiler/rustc_data_structures/src/base_n/tests.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,17 @@
11
use super::*;
22

33
#[test]
4-
fn test_encode() {
4+
fn limits() {
5+
assert_eq!(Ok(u128::MAX), u128::from_str_radix(&u128::MAX.to_base(36), 36));
6+
assert_eq!(Ok(u64::MAX), u64::from_str_radix(&u64::MAX.to_base(36), 36));
7+
assert_eq!(Ok(u32::MAX), u32::from_str_radix(&u32::MAX.to_base(36), 36));
8+
}
9+
10+
#[test]
11+
fn test_to_base() {
512
fn test(n: u128, base: usize) {
6-
assert_eq!(Ok(n), u128::from_str_radix(&encode(n, base), base as u32));
13+
assert_eq!(Ok(n), u128::from_str_radix(&n.to_base(base), base as u32));
14+
assert_eq!(Ok(n), u128::from_str_radix(&n.to_base_fixed_len(base), base as u32));
715
}
816

917
for base in 2..37 {

compiler/rustc_data_structures/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#![doc(rust_logo)]
1717
#![feature(allocator_api)]
1818
#![feature(array_windows)]
19+
#![feature(ascii_char)]
20+
#![feature(ascii_char_variants)]
1921
#![feature(auto_traits)]
2022
#![feature(cfg_match)]
2123
#![feature(core_intrinsics)]

compiler/rustc_incremental/src/persist/fs.rs

+25-29
Original file line numberDiff line numberDiff line change
@@ -104,10 +104,14 @@
104104
//! implemented.
105105
106106
use crate::errors;
107+
use rustc_data_structures::base_n;
108+
use rustc_data_structures::base_n::BaseNString;
109+
use rustc_data_structures::base_n::ToBaseN;
110+
use rustc_data_structures::base_n::CASE_INSENSITIVE;
111+
use rustc_data_structures::flock;
107112
use rustc_data_structures::fx::{FxHashSet, FxIndexSet};
108113
use rustc_data_structures::svh::Svh;
109114
use rustc_data_structures::unord::{UnordMap, UnordSet};
110-
use rustc_data_structures::{base_n, flock};
111115
use rustc_errors::ErrorGuaranteed;
112116
use rustc_fs_util::{link_or_copy, try_canonicalize, LinkOrCopy};
113117
use rustc_middle::bug;
@@ -333,31 +337,24 @@ pub fn finalize_session_directory(sess: &Session, svh: Option<Svh>) {
333337

334338
debug!("finalize_session_directory() - session directory: {}", incr_comp_session_dir.display());
335339

336-
let old_sub_dir_name = incr_comp_session_dir
340+
let mut sub_dir_name = incr_comp_session_dir
337341
.file_name()
338342
.unwrap()
339343
.to_str()
340-
.expect("malformed session dir name: contains non-Unicode characters");
344+
.expect("malformed session dir name: contains non-Unicode characters")
345+
.to_string();
341346

342-
// Keep the 's-{timestamp}-{random-number}' prefix, but replace the
343-
// '-working' part with the SVH of the crate
344-
let dash_indices: Vec<_> = old_sub_dir_name.match_indices('-').map(|(idx, _)| idx).collect();
345-
if dash_indices.len() != 3 {
346-
bug!(
347-
"Encountered incremental compilation session directory with \
348-
malformed name: {}",
349-
incr_comp_session_dir.display()
350-
)
351-
}
352-
353-
// State: "s-{timestamp}-{random-number}-"
354-
let mut new_sub_dir_name = String::from(&old_sub_dir_name[..=dash_indices[2]]);
347+
// Keep the 's-{timestamp}-{random-number}' prefix, but replace "working" with the SVH of the crate
348+
sub_dir_name.truncate(sub_dir_name.len() - "working".len());
349+
// Double-check that we kept this: "s-{timestamp}-{random-number}-"
350+
assert!(sub_dir_name.ends_with('-'), "{:?}", sub_dir_name);
351+
assert!(sub_dir_name.as_bytes().iter().filter(|b| **b == b'-').count() == 3);
355352

356-
// Append the svh
357-
base_n::push_str(svh.as_u128(), INT_ENCODE_BASE, &mut new_sub_dir_name);
353+
// Append the SVH
354+
sub_dir_name.push_str(&svh.as_u128().to_base_fixed_len(CASE_INSENSITIVE));
358355

359356
// Create the full path
360-
let new_path = incr_comp_session_dir.parent().unwrap().join(new_sub_dir_name);
357+
let new_path = incr_comp_session_dir.parent().unwrap().join(&*sub_dir_name);
361358
debug!("finalize_session_directory() - new path: {}", new_path.display());
362359

363360
match rename_path_with_retry(&*incr_comp_session_dir, &new_path, 3) {
@@ -453,11 +450,11 @@ fn generate_session_dir_path(crate_dir: &Path) -> PathBuf {
453450
let random_number = thread_rng().next_u32();
454451
debug!("generate_session_dir_path: random_number = {}", random_number);
455452

456-
let directory_name = format!(
457-
"s-{}-{}-working",
458-
timestamp,
459-
base_n::encode(random_number as u128, INT_ENCODE_BASE)
460-
);
453+
// Chop the first 3 characters off the timestamp. Those 3 bytes will be zero for a while.
454+
let (zeroes, timestamp) = timestamp.split_at(3);
455+
assert_eq!(zeroes, "000");
456+
let directory_name =
457+
format!("s-{}-{}-working", timestamp, random_number.to_base_fixed_len(CASE_INSENSITIVE));
461458
debug!("generate_session_dir_path: directory_name = {}", directory_name);
462459
let directory_path = crate_dir.join(directory_name);
463460
debug!("generate_session_dir_path: directory_path = {}", directory_path.display());
@@ -588,10 +585,10 @@ fn extract_timestamp_from_session_dir(directory_name: &str) -> Result<SystemTime
588585
string_to_timestamp(&directory_name[dash_indices[0] + 1..dash_indices[1]])
589586
}
590587

591-
fn timestamp_to_string(timestamp: SystemTime) -> String {
588+
fn timestamp_to_string(timestamp: SystemTime) -> BaseNString {
592589
let duration = timestamp.duration_since(UNIX_EPOCH).unwrap();
593590
let micros = duration.as_secs() * 1_000_000 + (duration.subsec_nanos() as u64) / 1000;
594-
base_n::encode(micros as u128, INT_ENCODE_BASE)
591+
micros.to_base_fixed_len(CASE_INSENSITIVE)
595592
}
596593

597594
fn string_to_timestamp(s: &str) -> Result<SystemTime, &'static str> {
@@ -622,9 +619,8 @@ fn crate_path(sess: &Session) -> PathBuf {
622619
sess.cfg_version,
623620
);
624621

625-
let stable_crate_id = base_n::encode(stable_crate_id.as_u64() as u128, INT_ENCODE_BASE);
626-
627-
let crate_name = format!("{crate_name}-{stable_crate_id}");
622+
let crate_name =
623+
format!("{crate_name}-{}", stable_crate_id.as_u64().to_base_fixed_len(CASE_INSENSITIVE));
628624
incr_dir.join(crate_name)
629625
}
630626

compiler/rustc_middle/src/mir/mono.rs

+5-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
use crate::dep_graph::{DepNode, WorkProduct, WorkProductId};
22
use crate::ty::{GenericArgs, Instance, InstanceDef, SymbolName, TyCtxt};
33
use rustc_attr::InlineAttr;
4-
use rustc_data_structures::base_n;
4+
use rustc_data_structures::base_n::BaseNString;
5+
use rustc_data_structures::base_n::ToBaseN;
6+
use rustc_data_structures::base_n::CASE_INSENSITIVE;
57
use rustc_data_structures::fingerprint::Fingerprint;
68
use rustc_data_structures::fx::FxHashMap;
79
use rustc_data_structures::fx::FxIndexMap;
@@ -337,14 +339,11 @@ impl<'tcx> CodegenUnit<'tcx> {
337339
self.is_code_coverage_dead_code_cgu = true;
338340
}
339341

340-
pub fn mangle_name(human_readable_name: &str) -> String {
341-
// We generate a 80 bit hash from the name. This should be enough to
342-
// avoid collisions and is still reasonably short for filenames.
342+
pub fn mangle_name(human_readable_name: &str) -> BaseNString {
343343
let mut hasher = StableHasher::new();
344344
human_readable_name.hash(&mut hasher);
345345
let hash: Hash128 = hasher.finish();
346-
let hash = hash.as_u128() & ((1u128 << 80) - 1);
347-
base_n::encode(hash, base_n::CASE_INSENSITIVE)
346+
hash.as_u128().to_base_fixed_len(CASE_INSENSITIVE)
348347
}
349348

350349
pub fn compute_size_estimate(&mut self) {

compiler/rustc_sanitizers/src/cfi/typeid/itanium_cxx_abi/encode.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@
44
//!
55
//! For more information about LLVM CFI and cross-language LLVM CFI support for the Rust compiler,
66
//! see design document in the tracking issue #89653.
7-
use rustc_data_structures::base_n;
7+
use rustc_data_structures::base_n::ToBaseN;
8+
use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
9+
use rustc_data_structures::base_n::CASE_INSENSITIVE;
810
use rustc_data_structures::fx::FxHashMap;
911
use rustc_hir as hir;
1012
use rustc_middle::bug;
@@ -736,7 +738,7 @@ fn encode_ty_name(tcx: TyCtxt<'_>, def_id: DefId) -> String {
736738
/// <https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html>).
737739
fn to_disambiguator(num: u64) -> String {
738740
if let Some(num) = num.checked_sub(1) {
739-
format!("s{}_", base_n::encode(num as u128, base_n::ALPHANUMERIC_ONLY))
741+
format!("s{}_", num.to_base(ALPHANUMERIC_ONLY))
740742
} else {
741743
"s_".to_string()
742744
}
@@ -746,7 +748,7 @@ fn to_disambiguator(num: u64) -> String {
746748
/// <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangle.seq-id>).
747749
fn to_seq_id(num: usize) -> String {
748750
if let Some(num) = num.checked_sub(1) {
749-
base_n::encode(num as u128, base_n::CASE_INSENSITIVE).to_uppercase()
751+
(num as u64).to_base(CASE_INSENSITIVE).to_uppercase()
750752
} else {
751753
"".to_string()
752754
}

compiler/rustc_symbol_mangling/src/v0.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use rustc_data_structures::base_n;
1+
use rustc_data_structures::base_n::ToBaseN;
22
use rustc_data_structures::fx::FxHashMap;
33
use rustc_data_structures::intern::Interned;
44
use rustc_hir as hir;
@@ -832,7 +832,7 @@ impl<'tcx> Printer<'tcx> for SymbolMangler<'tcx> {
832832
/// e.g. `1` becomes `"0_"`, `62` becomes `"Z_"`, etc.
833833
pub(crate) fn push_integer_62(x: u64, output: &mut String) {
834834
if let Some(x) = x.checked_sub(1) {
835-
base_n::push_str(x as u128, base_n::ALPHANUMERIC_ONLY, output);
835+
output.push_str(&x.to_base(62));
836836
}
837837
output.push('_');
838838
}

0 commit comments

Comments
 (0)