Auto merge of #32779 - michaelwoerister:partitioning, r=nikomatsakis

Add initial version of codegen unit partitioning for incremental compilation. The task of the partitioning module is to take the complete set of translation items of a crate and produce a set of codegen units from it, where a codegen unit is a named set of (translation-item, linkage) pairs. That is, this module decides which translation item appears in which codegen units with which linkage. This version only handles the case of partitioning for incremental compilation, not the regular N-codegen units case. In the future the regular case should be handled too, maybe even doing a bit more analysis to intelligently figure out a good partitioning. One thing that could be improved is the syntax of the codegen unit tests. Right now they still use the compile-fail error specification infrastructure, so everything has to be on one line. Would be nice to be able to format things in a more readable way.
rust-lang · Apr 16, 2016 · fef6c64 · fef6c64
2 parents 576229f + e8441b6
commit fef6c64
Show file tree

Hide file tree

Showing 45 changed files with 1,326 additions and 69 deletions.
diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs
@@ -1911,6 +1911,7 @@ fn run_rustdoc_test(config: &Config, props: &TestProps, testpaths: &TestPaths) {
 }
 
 fn run_codegen_units_test(config: &Config, props: &TestProps, testpaths: &TestPaths) {
+
  assert!(props.revisions.is_empty(), "revisions not relevant here");
 
  let proc_res = compile_test(config, props, testpaths);
@@ -1921,36 +1922,148 @@ fn run_codegen_units_test(config: &Config, props: &TestProps, testpaths: &TestPa
 
  check_no_compiler_crash(None, &proc_res);
 
- let prefix = "TRANS_ITEM ";
+ const PREFIX: &'static str = "TRANS_ITEM ";
+ const CGU_MARKER: &'static str = "@@";
 
- let actual: HashSet<String> = proc_res
+ let actual: Vec<TransItem> = proc_res
  .stdout
  .lines()
- .filter(|line| line.starts_with(prefix))
- .map(|s| (&s[prefix.len()..]).to_string())
+ .filter(|line| line.starts_with(PREFIX))
+ .map(str_to_trans_item)
  .collect();
 
- let expected: HashSet<String> = errors::load_errors(&testpaths.file, None)
+ let expected: Vec<TransItem> = errors::load_errors(&testpaths.file, None)
  .iter()
- .map(|e| e.msg.trim().to_string())
+ .map(|e| str_to_trans_item(&e.msg[..]))
  .collect();
 
- if actual != expected {
- let mut missing: Vec<_> = expected.difference(&actual).collect();
+ let mut missing = Vec::new();
+ let mut wrong_cgus = Vec::new();
+
+ for expected_item in &expected {
+ let actual_item_with_same_name = actual.iter()
+ .find(|ti| ti.name == expected_item.name);
+
+ if let Some(actual_item) = actual_item_with_same_name {
+ if !expected_item.codegen_units.is_empty() {
+ // Also check for codegen units
+ if expected_item.codegen_units != actual_item.codegen_units {
+ wrong_cgus.push((expected_item.clone(), actual_item.clone()));
+ }
+ }
+ } else {
+ missing.push(expected_item.string.clone());
+ }
+ }
+
+ let unexpected: Vec<_> =
+ actual.iter()
+ .filter(|acgu| !expected.iter().any(|ecgu| acgu.name == ecgu.name))
+ .map(|acgu| acgu.string.clone())
+ .collect();
+
+ if !missing.is_empty() {
  missing.sort();
 
- let mut too_much: Vec<_> = actual.difference(&expected).collect();
- too_much.sort();
+ println!("\nThese items should have been contained but were not:\n");
+
+ for item in &missing {
+ println!("{}", item);
+ }
 
- println!("Expected and actual sets of codegen-items differ.\n\
- These items should have been contained but were not:\n\n\
- {}\n\n\
- These items were contained but should not have been:\n\n\
- {}\n\n",
- missing.iter().fold("".to_string(), |s1, s2| s1 + "\n" + s2),
- too_much.iter().fold("".to_string(), |s1, s2| s1 + "\n" + s2));
+ println!("\n");
+ }
+
+ if !unexpected.is_empty() {
+ let sorted = {
+ let mut sorted = unexpected.clone();
+ sorted.sort();
+ sorted
+ };
+
+ println!("\nThese items were contained but should not have been:\n");
+
+ for item in sorted {
+ println!("{}", item);
+ }
+
+ println!("\n");
+ }
+
+ if !wrong_cgus.is_empty() {
+ wrong_cgus.sort_by_key(|pair| pair.0.name.clone());
+ println!("\nThe following items were assigned to wrong codegen units:\n");
+
+ for &(ref expected_item, ref actual_item) in &wrong_cgus {
+ println!("{}", expected_item.name);
+ println!(" expected: {}", codegen_units_to_str(&expected_item.codegen_units));
+ println!(" actual: {}", codegen_units_to_str(&actual_item.codegen_units));
+ println!("");
+ }
+ }
+
+ if !(missing.is_empty() && unexpected.is_empty() && wrong_cgus.is_empty())
+ {
  panic!();
  }
+
+ #[derive(Clone, Eq, PartialEq)]
+ struct TransItem {
+ name: String,
+ codegen_units: HashSet<String>,
+ string: String,
+ }
+
+ // [TRANS_ITEM] name [@@ (cgu)+]
+ fn str_to_trans_item(s: &str) -> TransItem {
+ let s = if s.starts_with(PREFIX) {
+ (&s[PREFIX.len()..]).trim()
+ } else {
+ s.trim()
+ };
+
+ let full_string = format!("{}{}", PREFIX, s.trim().to_owned());
+
+ let parts: Vec<&str> = s.split(CGU_MARKER)
+ .map(str::trim)
+ .filter(|s| !s.is_empty())
+ .collect();
+
+ let name = parts[0].trim();
+
+ let cgus = if parts.len() > 1 {
+ let cgus_str = parts[1];
+
+ cgus_str.split(" ")
+ .map(str::trim)
+ .filter(|s| !s.is_empty())
+ .map(str::to_owned)
+ .collect()
+ }
+ else {
+ HashSet::new()
+ };
+
+ TransItem {
+ name: name.to_owned(),
+ codegen_units: cgus,
+ string: full_string,
+ }
+ }
+
+ fn codegen_units_to_str(cgus: &HashSet<String>) -> String
+ {
+ let mut cgus: Vec<_> = cgus.iter().collect();
+ cgus.sort();
+
+ let mut string = String::new();
+ for cgu in cgus {
+ string.push_str(&cgu[..]);
+ string.push_str(" ");
+ }
+
+ string
+ }
 }
 
 fn run_incremental_test(config: &Config, props: &TestProps, testpaths: &TestPaths) {

diff --git a/src/librustc/hir/map/collector.rs b/src/librustc/hir/map/collector.rs
@@ -136,9 +136,10 @@ impl<'ast> Visitor<'ast> for NodeCollector<'ast> {
  ItemDefaultImpl(..) | ItemImpl(..) =>
  DefPathData::Impl,
  ItemEnum(..) | ItemStruct(..) | ItemTrait(..) |
- ItemExternCrate(..) | ItemMod(..) | ItemForeignMod(..) |
- ItemTy(..) =>
+ ItemExternCrate(..) | ItemForeignMod(..) | ItemTy(..) =>
  DefPathData::TypeNs(i.name),
+ ItemMod(..) =>
+ DefPathData::Module(i.name),
  ItemStatic(..) | ItemConst(..) | ItemFn(..) =>
  DefPathData::ValueNs(i.name),
  ItemUse(..) =>

diff --git a/src/librustc/hir/map/definitions.rs b/src/librustc/hir/map/definitions.rs
@@ -147,6 +147,7 @@ pub enum DefPathData {
  Impl,
  TypeNs(ast::Name), // something in the type NS
  ValueNs(ast::Name), // something in the value NS
+ Module(ast::Name),
  MacroDef(ast::Name),
  ClosureExpr,
 
@@ -288,6 +289,7 @@ impl DefPathData {
  match *self {
  TypeNs(name) |
  ValueNs(name) |
+ Module(name) |
  MacroDef(name) |
  TypeParam(name) |
  LifetimeDef(name) |

diff --git a/src/librustc/ty/item_path.rs b/src/librustc/ty/item_path.rs
@@ -147,6 +147,7 @@ impl<'tcx> TyCtxt<'tcx> {
  data @ DefPathData::Misc |
  data @ DefPathData::TypeNs(..) |
  data @ DefPathData::ValueNs(..) |
+ data @ DefPathData::Module(..) |
  data @ DefPathData::TypeParam(..) |
  data @ DefPathData::LifetimeDef(..) |
  data @ DefPathData::EnumVariant(..) |
@@ -189,7 +190,7 @@ impl<'tcx> TyCtxt<'tcx> {
  // the impl is either in the same module as the self-type or
  // as the trait.
  let self_ty = self.lookup_item_type(impl_def_id).ty;
- let in_self_mod = match self.characteristic_def_id_of_type(self_ty) {
+ let in_self_mod = match characteristic_def_id_of_type(self_ty) {
  None => false,
  Some(ty_def_id) => self.parent_def_id(ty_def_id) == Some(parent_def_id),
  };
@@ -268,38 +269,6 @@ impl<'tcx> TyCtxt<'tcx> {
  buffer.push(&format!("<impl at {}>", span_str));
  }
 
- /// As a heuristic, when we see an impl, if we see that the
- /// 'self-type' is a type defined in the same module as the impl,
- /// we can omit including the path to the impl itself. This
- /// function tries to find a "characteristic def-id" for a
- /// type. It's just a heuristic so it makes some questionable
- /// decisions and we may want to adjust it later.
- fn characteristic_def_id_of_type(&self, ty: Ty<'tcx>) -> Option<DefId> {
- match ty.sty {
- ty::TyStruct(adt_def, _) |
- ty::TyEnum(adt_def, _) =>
- Some(adt_def.did),
-
- ty::TyTrait(ref data) =>
- Some(data.principal_def_id()),
-
- ty::TyBox(subty) =>
- self.characteristic_def_id_of_type(subty),
-
- ty::TyRawPtr(mt) |
- ty::TyRef(_, mt) =>
- self.characteristic_def_id_of_type(mt.ty),
-
- ty::TyTuple(ref tys) =>
- tys.iter()
- .filter_map(|ty| self.characteristic_def_id_of_type(ty))
- .next(),
-
- _ =>
- None
- }
- }
-
  /// Returns the def-id of `def_id`'s parent in the def tree. If
  /// this returns `None`, then `def_id` represents a crate root or
  /// inlined root.
@@ -309,6 +278,47 @@ impl<'tcx> TyCtxt<'tcx> {
  }
 }
 
+/// As a heuristic, when we see an impl, if we see that the
+/// 'self-type' is a type defined in the same module as the impl,
+/// we can omit including the path to the impl itself. This
+/// function tries to find a "characteristic def-id" for a
+/// type. It's just a heuristic so it makes some questionable
+/// decisions and we may want to adjust it later.
+pub fn characteristic_def_id_of_type<'tcx>(ty: Ty<'tcx>) -> Option<DefId> {
+ match ty.sty {
+ ty::TyStruct(adt_def, _) |
+ ty::TyEnum(adt_def, _) => Some(adt_def.did),
+
+ ty::TyTrait(ref data) => Some(data.principal_def_id()),
+
+ ty::TyArray(subty, _) |
+ ty::TySlice(subty) |
+ ty::TyBox(subty) => characteristic_def_id_of_type(subty),
+
+ ty::TyRawPtr(mt) |
+ ty::TyRef(_, mt) => characteristic_def_id_of_type(mt.ty),
+
+ ty::TyTuple(ref tys) => tys.iter()
+ .filter_map(|ty| characteristic_def_id_of_type(ty))
+ .next(),
+
+ ty::TyFnDef(def_id, _, _) |
+ ty::TyClosure(def_id, _) => Some(def_id),
+
+ ty::TyBool |
+ ty::TyChar |
+ ty::TyInt(_) |
+ ty::TyUint(_) |
+ ty::TyStr |
+ ty::TyFnPtr(_) |
+ ty::TyProjection(_) |
+ ty::TyParam(_) |
+ ty::TyInfer(_) |
+ ty::TyError |
+ ty::TyFloat(_) => None,
+ }
+}
+
 /// Unifying Trait for different kinds of item paths we might
 /// construct. The basic interface is that components get pushed: the
 /// instance can also customize how we handle the root of a crate.

diff --git a/src/librustc_llvm/lib.rs b/src/librustc_llvm/lib.rs
@@ -97,7 +97,7 @@ pub enum Visibility {
 // DLLExportLinkage, GhostLinkage and LinkOnceODRAutoHideLinkage.
 // LinkerPrivateLinkage and LinkerPrivateWeakLinkage are not included either;
 // they've been removed in upstream LLVM commit r203866.
-#[derive(Copy, Clone)]
+#[derive(Copy, Clone, PartialEq, Eq, Debug)]
 pub enum Linkage {
  ExternalLinkage = 0,
  AvailableExternallyLinkage = 1,

diff --git a/src/librustc_metadata/def_key.rs b/src/librustc_metadata/def_key.rs
@@ -31,6 +31,7 @@ pub enum DefPathData {
  Impl,
  TypeNs,
  ValueNs,
+ Module,
  MacroDef,
  ClosureExpr,
  TypeParam,
@@ -61,6 +62,7 @@ fn simplify_def_path_data(data: hir_map::DefPathData) -> DefPathData {
  hir_map::DefPathData::Impl => DefPathData::Impl,
  hir_map::DefPathData::TypeNs(_) => DefPathData::TypeNs,
  hir_map::DefPathData::ValueNs(_) => DefPathData::ValueNs,
+ hir_map::DefPathData::Module(_) => DefPathData::Module,
  hir_map::DefPathData::MacroDef(_) => DefPathData::MacroDef,
  hir_map::DefPathData::ClosureExpr => DefPathData::ClosureExpr,
  hir_map::DefPathData::TypeParam(_) => DefPathData::TypeParam,
@@ -91,6 +93,7 @@ fn recover_def_path_data(data: DefPathData, name: Option<Name>) -> hir_map::DefP
  DefPathData::Impl => hir_map::DefPathData::Impl,
  DefPathData::TypeNs => hir_map::DefPathData::TypeNs(name.unwrap()),
  DefPathData::ValueNs => hir_map::DefPathData::ValueNs(name.unwrap()),
+ DefPathData::Module => hir_map::DefPathData::Module(name.unwrap()),
  DefPathData::MacroDef => hir_map::DefPathData::MacroDef(name.unwrap()),
  DefPathData::ClosureExpr => hir_map::DefPathData::ClosureExpr,
  DefPathData::TypeParam => hir_map::DefPathData::TypeParam(name.unwrap()),