From c9fbaab4533f109ce4e7b56ab8d3df0a49c46ffb Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 18 Feb 2025 14:16:22 +1100 Subject: [PATCH 1/3] Reflow `MirPhase` comments. Currently many of them exceed 100 chars, which makes them painful to read on a terminal that is 100 chars wide. --- compiler/rustc_middle/src/mir/syntax.rs | 47 +++++++++++++------------ 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/compiler/rustc_middle/src/mir/syntax.rs b/compiler/rustc_middle/src/mir/syntax.rs index 389792a04f8a0..69c19ae7b9c2a 100644 --- a/compiler/rustc_middle/src/mir/syntax.rs +++ b/compiler/rustc_middle/src/mir/syntax.rs @@ -53,7 +53,8 @@ pub enum MirPhase { /// sequences of statements that would generally be subject to constant promotion are /// semantically constants, while in analysis MIR all constants are explicit. /// - /// The result of const promotion is available from the `mir_promoted` and `promoted_mir` queries. + /// The result of const promotion is available from the `mir_promoted` and `promoted_mir` + /// queries. /// /// This is the version of MIR used by borrowck and friends. Analysis(AnalysisPhase), @@ -61,26 +62,27 @@ pub enum MirPhase { /// /// The semantic changes that occur in the lowering from analysis to runtime MIR are as follows: /// - /// - Drops: In analysis MIR, `Drop` terminators represent *conditional* drops; roughly speaking, - /// if dataflow analysis determines that the place being dropped is uninitialized, the drop will - /// not be executed. The exact semantics of this aren't written down anywhere, which means they - /// are essentially "what drop elaboration does." In runtime MIR, the drops are unconditional; - /// when a `Drop` terminator is reached, if the type has drop glue that drop glue is always - /// executed. This may be UB if the underlying place is not initialized. - /// - Packed drops: Places might in general be misaligned - in most cases this is UB, the exception - /// is fields of packed structs. In analysis MIR, `Drop(P)` for a `P` that might be misaligned - /// for this reason implicitly moves `P` to a temporary before dropping. Runtime MIR has no such - /// rules, and dropping a misaligned place is simply UB. - /// - Unwinding: in analysis MIR, unwinding from a function which may not unwind aborts. In runtime - /// MIR, this is UB. - /// - Retags: If `-Zmir-emit-retag` is enabled, analysis MIR has "implicit" retags in the same way - /// that Rust itself has them. Where exactly these are is generally subject to change, and so we - /// don't document this here. Runtime MIR has most retags explicit (though implicit retags - /// can still occur at `Rvalue::{Ref,AddrOf}`). - /// - Coroutine bodies: In analysis MIR, locals may actually be behind a pointer that user code has - /// access to. This occurs in coroutine bodies. Such locals do not behave like other locals, - /// because they eg may be aliased in surprising ways. Runtime MIR has no such special locals - - /// all coroutine bodies are lowered and so all places that look like locals really are locals. + /// - Drops: In analysis MIR, `Drop` terminators represent *conditional* drops; roughly + /// speaking, if dataflow analysis determines that the place being dropped is uninitialized, + /// the drop will not be executed. The exact semantics of this aren't written down anywhere, + /// which means they are essentially "what drop elaboration does." In runtime MIR, the drops + /// are unconditional; when a `Drop` terminator is reached, if the type has drop glue that + /// drop glue is always executed. This may be UB if the underlying place is not initialized. + /// - Packed drops: Places might in general be misaligned - in most cases this is UB, the + /// exception is fields of packed structs. In analysis MIR, `Drop(P)` for a `P` that might be + /// misaligned for this reason implicitly moves `P` to a temporary before dropping. Runtime + /// MIR has no such rules, and dropping a misaligned place is simply UB. + /// - Unwinding: in analysis MIR, unwinding from a function which may not unwind aborts. In + /// runtime MIR, this is UB. + /// - Retags: If `-Zmir-emit-retag` is enabled, analysis MIR has "implicit" retags in the same + /// way that Rust itself has them. Where exactly these are is generally subject to change, + /// and so we don't document this here. Runtime MIR has most retags explicit (though implicit + /// retags can still occur at `Rvalue::{Ref,AddrOf}`). + /// - Coroutine bodies: In analysis MIR, locals may actually be behind a pointer that user code + /// has access to. This occurs in coroutine bodies. Such locals do not behave like other + /// locals, because they eg may be aliased in surprising ways. Runtime MIR has no such + /// special locals. All coroutine bodies are lowered and so all places that look like locals + /// really are locals. /// /// Also note that the lint pass which reports eg `200_u8 + 200_u8` as an error is run as a part /// of analysis to runtime MIR lowering. To ensure lints are reported reliably, this means that @@ -111,7 +113,8 @@ pub enum AnalysisPhase { /// * [`TerminatorKind::FalseEdge`] /// * [`StatementKind::FakeRead`] /// * [`StatementKind::AscribeUserType`] - /// * [`StatementKind::Coverage`] with [`CoverageKind::BlockMarker`] or [`CoverageKind::SpanMarker`] + /// * [`StatementKind::Coverage`] with [`CoverageKind::BlockMarker`] or + /// [`CoverageKind::SpanMarker`] /// * [`Rvalue::Ref`] with `BorrowKind::Fake` /// * [`CastKind::PointerCoercion`] with any of the following: /// * [`PointerCoercion::ArrayToPointer`] From c039533656d344b465cf95da721197a6fdd3d7f7 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 18 Feb 2025 14:33:59 +1100 Subject: [PATCH 2/3] Improve MIR phase comments. I found the dialect/phase distinction quite confusing when I first read these comments. This commit clarifies things a bit. --- compiler/rustc_middle/src/mir/syntax.rs | 51 ++++++++++++++----------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/compiler/rustc_middle/src/mir/syntax.rs b/compiler/rustc_middle/src/mir/syntax.rs index 69c19ae7b9c2a..b63c4d172f2e7 100644 --- a/compiler/rustc_middle/src/mir/syntax.rs +++ b/compiler/rustc_middle/src/mir/syntax.rs @@ -23,44 +23,49 @@ use crate::ty::{self, GenericArgsRef, List, Region, Ty, UserTypeAnnotationIndex} /// Represents the "flavors" of MIR. /// -/// All flavors of MIR use the same data structure, but there are some important differences. These -/// differences come in two forms: Dialects and phases. +/// The MIR pipeline is structured into a few major dialects, with one or more phases within each +/// dialect. A MIR flavor is identified by a dialect-phase pair. A single `MirPhase` value +/// specifies such a pair. All flavors of MIR use the same data structure to represent the program. /// -/// Dialects represent a stronger distinction than phases. This is because the transitions between -/// dialects are semantic changes, and therefore technically *lowerings* between distinct IRs. In -/// other words, the same [`Body`](crate::mir::Body) might be well-formed for multiple dialects, but -/// have different semantic meaning and different behavior at runtime. +/// Different MIR dialects have different semantics. (The differences between dialects are small, +/// but they do exist.) The progression from one MIR dialect to the next is technically a lowering +/// from one IR to another. In other words, a single well-formed [`Body`](crate::mir::Body) might +/// have different semantic meaning and different behavior at runtime in the different dialects. +/// The specific differences between dialects are described on the variants below. /// -/// Each dialect additionally has a number of phases. However, phase changes never involve semantic -/// changes. If some MIR is well-formed both before and after a phase change, it is also guaranteed -/// that it has the same semantic meaning. In this sense, phase changes can only add additional -/// restrictions on what MIR is well-formed. +/// Phases exist only to place restrictions on what language constructs are permitted in +/// well-formed MIR, and subsequent phases mostly increase those restrictions. I.e. to convert MIR +/// from one phase to the next might require removing/replacing certain MIR constructs. /// -/// When adding phases, remember to update [`MirPhase::phase_index`]. +/// When adding dialects or phases, remember to update [`MirPhase::phase_index`]. #[derive(Copy, Clone, TyEncodable, TyDecodable, Debug, PartialEq, Eq, PartialOrd, Ord)] #[derive(HashStable)] pub enum MirPhase { - /// The MIR that is generated by MIR building. + /// The "built MIR" dialect, as generated by MIR building. /// /// The only things that operate on this dialect are unsafeck, the various MIR lints, and const /// qualifs. /// - /// This has no distinct phases. + /// This dialect has just the one (implicit) phase, which places few restrictions on what MIR + /// constructs are allowed. Built, - /// The MIR used for most analysis. + + /// The "analysis MIR" dialect, used for borrowck and friends. /// - /// The only semantic change between analysis and built MIR is constant promotion. In built MIR, - /// sequences of statements that would generally be subject to constant promotion are - /// semantically constants, while in analysis MIR all constants are explicit. + /// The only semantic difference between built MIR and analysis MIR relates to constant + /// promotion. In built MIR, sequences of statements that would generally be subject to + /// constant promotion are semantically constants, while in analysis MIR all constants are + /// explicit. /// /// The result of const promotion is available from the `mir_promoted` and `promoted_mir` /// queries. /// - /// This is the version of MIR used by borrowck and friends. + /// The phases of this dialect are described in `AnalysisPhase`. Analysis(AnalysisPhase), - /// The MIR used for CTFE, optimizations, and codegen. + + /// The "runtime MIR" dialect, used for CTFE, optimizations, and codegen. /// - /// The semantic changes that occur in the lowering from analysis to runtime MIR are as follows: + /// The semantic differences between analysis MIR and runtime MIR are as follows. /// /// - Drops: In analysis MIR, `Drop` terminators represent *conditional* drops; roughly /// speaking, if dataflow analysis determines that the place being dropped is uninitialized, @@ -80,13 +85,15 @@ pub enum MirPhase { /// retags can still occur at `Rvalue::{Ref,AddrOf}`). /// - Coroutine bodies: In analysis MIR, locals may actually be behind a pointer that user code /// has access to. This occurs in coroutine bodies. Such locals do not behave like other - /// locals, because they eg may be aliased in surprising ways. Runtime MIR has no such + /// locals, because they e.g. may be aliased in surprising ways. Runtime MIR has no such /// special locals. All coroutine bodies are lowered and so all places that look like locals /// really are locals. /// /// Also note that the lint pass which reports eg `200_u8 + 200_u8` as an error is run as a part /// of analysis to runtime MIR lowering. To ensure lints are reported reliably, this means that - /// transformations which may suppress such errors should not run on analysis MIR. + /// transformations that can suppress such errors should not run on analysis MIR. + /// + /// The phases of this dialect are described in `RuntimePhase`. Runtime(RuntimePhase), } From 83a7fb61fbf2377961d05cad10a699fd954a4bb7 Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Tue, 18 Feb 2025 15:17:50 +1100 Subject: [PATCH 3/3] Improve how the MIR dialect/phase index is reported. The only visible change is to the filenames produce by `-Zdump-mir`. E.g. before and after: ``` h.main.003-000.analysis-post-cleanup.after.mir h.main.2-2-000.analysis-post-cleanup.after.mir ``` It also fixes a FIXME comment. --- compiler/rustc_middle/src/mir/mod.rs | 21 +++++++-------------- compiler/rustc_middle/src/mir/pretty.rs | 3 ++- compiler/rustc_middle/src/mir/syntax.rs | 2 +- 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index 528da4ca05776..096422369dc05 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -99,20 +99,13 @@ impl<'tcx> HasLocalDecls<'tcx> for Body<'tcx> { } impl MirPhase { - /// Gets the index of the current MirPhase within the set of all `MirPhase`s. - /// - /// FIXME(JakobDegen): Return a `(usize, usize)` instead. - pub fn phase_index(&self) -> usize { - const BUILT_PHASE_COUNT: usize = 1; - const ANALYSIS_PHASE_COUNT: usize = 2; - match self { - MirPhase::Built => 1, - MirPhase::Analysis(analysis_phase) => { - 1 + BUILT_PHASE_COUNT + (*analysis_phase as usize) - } - MirPhase::Runtime(runtime_phase) => { - 1 + BUILT_PHASE_COUNT + ANALYSIS_PHASE_COUNT + (*runtime_phase as usize) - } + /// Gets the (dialect, phase) index of the current `MirPhase`. Both numbers + /// are 1-indexed. + pub fn index(&self) -> (usize, usize) { + match *self { + MirPhase::Built => (1, 1), + MirPhase::Analysis(analysis_phase) => (2, 1 + analysis_phase as usize), + MirPhase::Runtime(runtime_phase) => (3, 1 + runtime_phase as usize), } } diff --git a/compiler/rustc_middle/src/mir/pretty.rs b/compiler/rustc_middle/src/mir/pretty.rs index 1e3b8d029e1b3..dd3de48fc14fd 100644 --- a/compiler/rustc_middle/src/mir/pretty.rs +++ b/compiler/rustc_middle/src/mir/pretty.rs @@ -231,7 +231,8 @@ fn dump_path<'tcx>( let pass_num = if tcx.sess.opts.unstable_opts.dump_mir_exclude_pass_number { String::new() } else if pass_num { - format!(".{:03}-{:03}", body.phase.phase_index(), body.pass_count) + let (dialect_index, phase_index) = body.phase.index(); + format!(".{}-{}-{:03}", dialect_index, phase_index, body.pass_count) } else { ".-------".to_string() }; diff --git a/compiler/rustc_middle/src/mir/syntax.rs b/compiler/rustc_middle/src/mir/syntax.rs index b63c4d172f2e7..af6f0e4c55183 100644 --- a/compiler/rustc_middle/src/mir/syntax.rs +++ b/compiler/rustc_middle/src/mir/syntax.rs @@ -37,7 +37,7 @@ use crate::ty::{self, GenericArgsRef, List, Region, Ty, UserTypeAnnotationIndex} /// well-formed MIR, and subsequent phases mostly increase those restrictions. I.e. to convert MIR /// from one phase to the next might require removing/replacing certain MIR constructs. /// -/// When adding dialects or phases, remember to update [`MirPhase::phase_index`]. +/// When adding dialects or phases, remember to update [`MirPhase::index`]. #[derive(Copy, Clone, TyEncodable, TyDecodable, Debug, PartialEq, Eq, PartialOrd, Ord)] #[derive(HashStable)] pub enum MirPhase {