@@ -113,6 +113,7 @@ use rustc_middle::query::Providers;
113
113
use rustc_middle:: ty:: print:: { characteristic_def_id_of_type, with_no_trimmed_paths} ;
114
114
use rustc_middle:: ty:: { self , visit:: TypeVisitableExt , InstanceDef , TyCtxt } ;
115
115
use rustc_session:: config:: { DumpMonoStatsFormat , SwitchWithOptPath } ;
116
+ use rustc_session:: CodegenUnits ;
116
117
use rustc_span:: symbol:: Symbol ;
117
118
118
119
use crate :: collector:: UsageMap ;
@@ -121,7 +122,6 @@ use crate::errors::{CouldntDumpMonoStats, SymbolAlreadyDefined, UnknownCguCollec
121
122
122
123
struct PartitioningCx < ' a , ' tcx > {
123
124
tcx : TyCtxt < ' tcx > ,
124
- target_cgu_count : usize ,
125
125
usage_map : & ' a UsageMap < ' tcx > ,
126
126
}
127
127
@@ -130,26 +130,30 @@ struct PlacedRootMonoItems<'tcx> {
130
130
codegen_units : Vec < CodegenUnit < ' tcx > > ,
131
131
132
132
internalization_candidates : FxHashSet < MonoItem < ' tcx > > ,
133
+
134
+ /// These must be obtained when the iterator in `partition` runs. They
135
+ /// can't be obtained later because some inlined functions might not be
136
+ /// reachable.
137
+ unique_inlined_stats : ( usize , usize ) ,
133
138
}
134
139
135
140
// The output CGUs are sorted by name.
136
141
fn partition < ' tcx , I > (
137
142
tcx : TyCtxt < ' tcx > ,
138
143
mono_items : I ,
139
- max_cgu_count : usize ,
140
144
usage_map : & UsageMap < ' tcx > ,
141
145
) -> Vec < CodegenUnit < ' tcx > >
142
146
where
143
147
I : Iterator < Item = MonoItem < ' tcx > > ,
144
148
{
145
149
let _prof_timer = tcx. prof . generic_activity ( "cgu_partitioning" ) ;
146
150
147
- let cx = & PartitioningCx { tcx, target_cgu_count : max_cgu_count , usage_map } ;
151
+ let cx = & PartitioningCx { tcx, usage_map } ;
148
152
149
153
// In the first step, we place all regular monomorphizations into their
150
154
// respective 'home' codegen unit. Regular monomorphizations are all
151
155
// functions and statics defined in the local crate.
152
- let PlacedRootMonoItems { mut codegen_units, internalization_candidates } = {
156
+ let PlacedRootMonoItems { mut codegen_units, internalization_candidates, unique_inlined_stats } = {
153
157
let _prof_timer = tcx. prof . generic_activity ( "cgu_partitioning_place_roots" ) ;
154
158
place_root_mono_items ( cx, mono_items)
155
159
} ;
@@ -158,15 +162,15 @@ where
158
162
cgu. create_size_estimate ( tcx) ;
159
163
}
160
164
161
- debug_dump ( tcx, "INITIAL PARTITIONING " , & codegen_units) ;
165
+ debug_dump ( tcx, "ROOTS " , & codegen_units, unique_inlined_stats ) ;
162
166
163
167
// Merge until we have at most `max_cgu_count` codegen units.
164
168
// `merge_codegen_units` is responsible for updating the CGU size
165
169
// estimates.
166
170
{
167
171
let _prof_timer = tcx. prof . generic_activity ( "cgu_partitioning_merge_cgus" ) ;
168
172
merge_codegen_units ( cx, & mut codegen_units) ;
169
- debug_dump ( tcx, "POST MERGING " , & codegen_units) ;
173
+ debug_dump ( tcx, "MERGE " , & codegen_units, unique_inlined_stats ) ;
170
174
}
171
175
172
176
// In the next step, we use the inlining map to determine which additional
@@ -182,7 +186,7 @@ where
182
186
cgu. create_size_estimate ( tcx) ;
183
187
}
184
188
185
- debug_dump ( tcx, "POST INLINING " , & codegen_units) ;
189
+ debug_dump ( tcx, "INLINE " , & codegen_units, unique_inlined_stats ) ;
186
190
187
191
// Next we try to make as many symbols "internal" as possible, so LLVM has
188
192
// more freedom to optimize.
@@ -226,7 +230,7 @@ where
226
230
// Ensure CGUs are sorted by name, so that we get deterministic results.
227
231
assert ! ( codegen_units. is_sorted_by( |a, b| Some ( a. name( ) . as_str( ) . cmp( b. name( ) . as_str( ) ) ) ) ) ;
228
232
229
- debug_dump ( tcx, "FINAL" , & codegen_units) ;
233
+ debug_dump ( tcx, "FINAL" , & codegen_units, unique_inlined_stats ) ;
230
234
231
235
codegen_units
232
236
}
@@ -252,10 +256,16 @@ where
252
256
let cgu_name_builder = & mut CodegenUnitNameBuilder :: new ( cx. tcx ) ;
253
257
let cgu_name_cache = & mut FxHashMap :: default ( ) ;
254
258
259
+ let mut num_unique_inlined_items = 0 ;
260
+ let mut unique_inlined_items_size = 0 ;
255
261
for mono_item in mono_items {
256
262
match mono_item. instantiation_mode ( cx. tcx ) {
257
263
InstantiationMode :: GloballyShared { .. } => { }
258
- InstantiationMode :: LocalCopy => continue ,
264
+ InstantiationMode :: LocalCopy => {
265
+ num_unique_inlined_items += 1 ;
266
+ unique_inlined_items_size += mono_item. size_estimate ( cx. tcx ) ;
267
+ continue ;
268
+ }
259
269
}
260
270
261
271
let characteristic_def_id = characteristic_def_id_of_mono_item ( cx. tcx , mono_item) ;
@@ -300,7 +310,11 @@ where
300
310
let mut codegen_units: Vec < _ > = codegen_units. into_values ( ) . collect ( ) ;
301
311
codegen_units. sort_by ( |a, b| a. name ( ) . as_str ( ) . cmp ( b. name ( ) . as_str ( ) ) ) ;
302
312
303
- PlacedRootMonoItems { codegen_units, internalization_candidates }
313
+ PlacedRootMonoItems {
314
+ codegen_units,
315
+ internalization_candidates,
316
+ unique_inlined_stats : ( num_unique_inlined_items, unique_inlined_items_size) ,
317
+ }
304
318
}
305
319
306
320
// This function requires the CGUs to be sorted by name on input, and ensures
@@ -309,7 +323,7 @@ fn merge_codegen_units<'tcx>(
309
323
cx : & PartitioningCx < ' _ , ' tcx > ,
310
324
codegen_units : & mut Vec < CodegenUnit < ' tcx > > ,
311
325
) {
312
- assert ! ( cx. target_cgu_count >= 1 ) ;
326
+ assert ! ( cx. tcx . sess . codegen_units ( ) . as_usize ( ) >= 1 ) ;
313
327
314
328
// A sorted order here ensures merging is deterministic.
315
329
assert ! ( codegen_units. is_sorted_by( |a, b| Some ( a. name( ) . as_str( ) . cmp( b. name( ) . as_str( ) ) ) ) ) ;
@@ -318,11 +332,32 @@ fn merge_codegen_units<'tcx>(
318
332
let mut cgu_contents: FxHashMap < Symbol , Vec < Symbol > > =
319
333
codegen_units. iter ( ) . map ( |cgu| ( cgu. name ( ) , vec ! [ cgu. name( ) ] ) ) . collect ( ) ;
320
334
321
- // Merge the two smallest codegen units until the target size is
322
- // reached.
323
- while codegen_units. len ( ) > cx. target_cgu_count {
324
- // Sort small cgus to the back
335
+ // Having multiple CGUs can drastically speed up compilation. But for
336
+ // non-incremental builds, tiny CGUs slow down compilation *and* result in
337
+ // worse generated code. So we don't allow CGUs smaller than this (unless
338
+ // there is just one CGU, of course). Note that CGU sizes of 100,000+ are
339
+ // common in larger programs, so this isn't all that large.
340
+ const NON_INCR_MIN_CGU_SIZE : usize = 1000 ;
341
+
342
+ // Repeatedly merge the two smallest codegen units as long as:
343
+ // - we have more CGUs than the upper limit, or
344
+ // - (Non-incremental builds only) the user didn't specify a CGU count, and
345
+ // there are multiple CGUs, and some are below the minimum size.
346
+ //
347
+ // The "didn't specify a CGU count" condition is because when an explicit
348
+ // count is requested we observe it as closely as possible. For example,
349
+ // the `compiler_builtins` crate sets `codegen-units = 10000` and it's
350
+ // critical they aren't merged. Also, some tests use explicit small values
351
+ // and likewise won't work if small CGUs are merged.
352
+ while codegen_units. len ( ) > cx. tcx . sess . codegen_units ( ) . as_usize ( )
353
+ || ( cx. tcx . sess . opts . incremental . is_none ( )
354
+ && matches ! ( cx. tcx. sess. codegen_units( ) , CodegenUnits :: Default ( _) )
355
+ && codegen_units. len ( ) > 1
356
+ && codegen_units. iter ( ) . any ( |cgu| cgu. size_estimate ( ) < NON_INCR_MIN_CGU_SIZE ) )
357
+ {
358
+ // Sort small cgus to the back.
325
359
codegen_units. sort_by_cached_key ( |cgu| cmp:: Reverse ( cgu. size_estimate ( ) ) ) ;
360
+
326
361
let mut smallest = codegen_units. pop ( ) . unwrap ( ) ;
327
362
let second_smallest = codegen_units. last_mut ( ) . unwrap ( ) ;
328
363
@@ -814,47 +849,147 @@ fn default_visibility(tcx: TyCtxt<'_>, id: DefId, is_generic: bool) -> Visibilit
814
849
}
815
850
}
816
851
817
- fn debug_dump < ' a , ' tcx : ' a > ( tcx : TyCtxt < ' tcx > , label : & str , cgus : & [ CodegenUnit < ' tcx > ] ) {
852
+ fn debug_dump < ' a , ' tcx : ' a > (
853
+ tcx : TyCtxt < ' tcx > ,
854
+ label : & str ,
855
+ cgus : & [ CodegenUnit < ' tcx > ] ,
856
+ ( unique_inlined_items, unique_inlined_size) : ( usize , usize ) ,
857
+ ) {
818
858
let dump = move || {
819
859
use std:: fmt:: Write ;
820
860
821
- let num_cgus = cgus. len ( ) ;
822
- let num_items: usize = cgus. iter ( ) . map ( |cgu| cgu. items ( ) . len ( ) ) . sum ( ) ;
823
- let total_size: usize = cgus. iter ( ) . map ( |cgu| cgu. size_estimate ( ) ) . sum ( ) ;
824
- let max_size = cgus. iter ( ) . map ( |cgu| cgu. size_estimate ( ) ) . max ( ) . unwrap ( ) ;
825
- let min_size = cgus. iter ( ) . map ( |cgu| cgu. size_estimate ( ) ) . min ( ) . unwrap ( ) ;
826
- let max_min_size_ratio = max_size as f64 / min_size as f64 ;
861
+ let mut num_cgus = 0 ;
862
+ let mut all_cgu_sizes = Vec :: new ( ) ;
863
+
864
+ // Note: every unique root item is placed exactly once, so the number
865
+ // of unique root items always equals the number of placed root items.
866
+
867
+ let mut root_items = 0 ;
868
+ // unique_inlined_items is passed in above.
869
+ let mut placed_inlined_items = 0 ;
870
+
871
+ let mut root_size = 0 ;
872
+ // unique_inlined_size is passed in above.
873
+ let mut placed_inlined_size = 0 ;
874
+
875
+ for cgu in cgus. iter ( ) {
876
+ num_cgus += 1 ;
877
+ all_cgu_sizes. push ( cgu. size_estimate ( ) ) ;
878
+
879
+ for ( item, _) in cgu. items ( ) {
880
+ match item. instantiation_mode ( tcx) {
881
+ InstantiationMode :: GloballyShared { .. } => {
882
+ root_items += 1 ;
883
+ root_size += item. size_estimate ( tcx) ;
884
+ }
885
+ InstantiationMode :: LocalCopy => {
886
+ placed_inlined_items += 1 ;
887
+ placed_inlined_size += item. size_estimate ( tcx) ;
888
+ }
889
+ }
890
+ }
891
+ }
892
+
893
+ all_cgu_sizes. sort_unstable_by_key ( |& n| cmp:: Reverse ( n) ) ;
894
+
895
+ let unique_items = root_items + unique_inlined_items;
896
+ let placed_items = root_items + placed_inlined_items;
897
+ let items_ratio = placed_items as f64 / unique_items as f64 ;
898
+
899
+ let unique_size = root_size + unique_inlined_size;
900
+ let placed_size = root_size + placed_inlined_size;
901
+ let size_ratio = placed_size as f64 / unique_size as f64 ;
902
+
903
+ let mean_cgu_size = placed_size as f64 / num_cgus as f64 ;
904
+
905
+ assert_eq ! ( placed_size, all_cgu_sizes. iter( ) . sum:: <usize >( ) ) ;
827
906
828
907
let s = & mut String :: new ( ) ;
908
+ let _ = writeln ! ( s, "{label}" ) ;
829
909
let _ = writeln ! (
830
910
s,
831
- "{label} ({num_items} items, total_size={total_size}; {num_cgus} CGUs, \
832
- max_size={max_size}, min_size={min_size}, max_size/min_size={max_min_size_ratio:.1}):"
911
+ "- unique items: {unique_items} ({root_items} root + {unique_inlined_items} inlined), \
912
+ unique size: {unique_size} ({root_size} root + {unique_inlined_size} inlined)\n \
913
+ - placed items: {placed_items} ({root_items} root + {placed_inlined_items} inlined), \
914
+ placed size: {placed_size} ({root_size} root + {placed_inlined_size} inlined)\n \
915
+ - placed/unique items ratio: {items_ratio:.2}, \
916
+ placed/unique size ratio: {size_ratio:.2}\n \
917
+ - CGUs: {num_cgus}, mean size: {mean_cgu_size:.1}, sizes: {}",
918
+ list( & all_cgu_sizes) ,
833
919
) ;
920
+ let _ = writeln ! ( s) ;
921
+
834
922
for ( i, cgu) in cgus. iter ( ) . enumerate ( ) {
923
+ let name = cgu. name ( ) ;
924
+ let size = cgu. size_estimate ( ) ;
835
925
let num_items = cgu. items ( ) . len ( ) ;
836
- let _ = writeln ! (
837
- s,
838
- "- CGU[{i}] {} ({num_items} items, size={}):" ,
839
- cgu. name( ) ,
840
- cgu. size_estimate( )
841
- ) ;
926
+ let mean_size = size as f64 / num_items as f64 ;
927
+
928
+ let mut placed_item_sizes: Vec < _ > =
929
+ cgu. items ( ) . iter ( ) . map ( |( item, _) | item. size_estimate ( tcx) ) . collect ( ) ;
930
+ placed_item_sizes. sort_unstable_by_key ( |& n| cmp:: Reverse ( n) ) ;
931
+ let sizes = list ( & placed_item_sizes) ;
932
+
933
+ let _ = writeln ! ( s, "- CGU[{i}]" ) ;
934
+ let _ = writeln ! ( s, " - {name}, size: {size}" ) ;
935
+ let _ =
936
+ writeln ! ( s, " - items: {num_items}, mean size: {mean_size:.1}, sizes: {sizes}" , ) ;
842
937
843
938
for ( item, linkage) in cgu. items_in_deterministic_order ( tcx) {
844
939
let symbol_name = item. symbol_name ( tcx) . name ;
845
940
let symbol_hash_start = symbol_name. rfind ( 'h' ) ;
846
941
let symbol_hash = symbol_hash_start. map_or ( "<no hash>" , |i| & symbol_name[ i..] ) ;
847
942
let size = item. size_estimate ( tcx) ;
943
+ let kind = match item. instantiation_mode ( tcx) {
944
+ InstantiationMode :: GloballyShared { .. } => "root" ,
945
+ InstantiationMode :: LocalCopy => "inlined" ,
946
+ } ;
848
947
let _ = with_no_trimmed_paths ! ( writeln!(
849
948
s,
850
- " - {item} [{linkage:?}] [{symbol_hash}] (size= {size})"
949
+ " - {item} [{linkage:?}] [{symbol_hash}] ({kind}, size: {size})"
851
950
) ) ;
852
951
}
853
952
854
953
let _ = writeln ! ( s) ;
855
954
}
856
955
857
- std:: mem:: take ( s)
956
+ return std:: mem:: take ( s) ;
957
+
958
+ // Converts a slice to a string, capturing repetitions to save space.
959
+ // E.g. `[4, 4, 4, 3, 2, 1, 1, 1, 1, 1]` -> "[4 (x3), 3, 2, 1 (x5)]".
960
+ fn list ( ns : & [ usize ] ) -> String {
961
+ let mut v = Vec :: new ( ) ;
962
+ if ns. is_empty ( ) {
963
+ return "[]" . to_string ( ) ;
964
+ }
965
+
966
+ let mut elem = |curr, curr_count| {
967
+ if curr_count == 1 {
968
+ v. push ( format ! ( "{curr}" ) ) ;
969
+ } else {
970
+ v. push ( format ! ( "{curr} (x{curr_count})" ) ) ;
971
+ }
972
+ } ;
973
+
974
+ let mut curr = ns[ 0 ] ;
975
+ let mut curr_count = 1 ;
976
+
977
+ for & n in & ns[ 1 ..] {
978
+ if n != curr {
979
+ elem ( curr, curr_count) ;
980
+ curr = n;
981
+ curr_count = 1 ;
982
+ } else {
983
+ curr_count += 1 ;
984
+ }
985
+ }
986
+ elem ( curr, curr_count) ;
987
+
988
+ let mut s = "[" . to_string ( ) ;
989
+ s. push_str ( & v. join ( ", " ) ) ;
990
+ s. push_str ( "]" ) ;
991
+ s
992
+ }
858
993
} ;
859
994
860
995
debug ! ( "{}" , dump( ) ) ;
@@ -922,8 +1057,7 @@ fn collect_and_partition_mono_items(tcx: TyCtxt<'_>, (): ()) -> (&DefIdSet, &[Co
922
1057
let ( codegen_units, _) = tcx. sess . time ( "partition_and_assert_distinct_symbols" , || {
923
1058
sync:: join (
924
1059
|| {
925
- let mut codegen_units =
926
- partition ( tcx, items. iter ( ) . copied ( ) , tcx. sess . codegen_units ( ) , & usage_map) ;
1060
+ let mut codegen_units = partition ( tcx, items. iter ( ) . copied ( ) , & usage_map) ;
927
1061
codegen_units[ 0 ] . make_primary ( ) ;
928
1062
& * tcx. arena . alloc_from_iter ( codegen_units)
929
1063
} ,
0 commit comments