astral-sh · dcreager · Jan 29, 2025 · Jan 22, 2025 · Jan 22, 2025 · Jan 22, 2025
@@ -0,0 +1,217 @@
+# Terminal statements
+
+## Introduction
+
+Terminal statements complicate a naive control-flow analysis.
+
+As a simple example:
+
+```py
+def f(cond: bool) -> str:
+    if cond:
+        x = "test"
+    else:
+        raise ValueError
+    return x
+
+def g(cond: bool):
+    if cond:
+        x = "test"
+        reveal_type(x)  # revealed: Literal["test"]
+    else:
+        x = "unreachable"
+        reveal_type(x)  # revealed: Literal["unreachable"]
+        raise ValueError
+    reveal_type(x)  # revealed: Literal["test"]
+```
+
+In `f`, we should be able to determine that the `else` branch ends in a terminal statement, and that
+the `return` statement can only be executed when the condition is true. We should therefore consider
+the reference always bound, even though `x` is only bound in the true branch.
+
+Similarly, in `g`, we should see that the assignment of the value `"unreachable"` can never be seen
+by the final `reveal_type`.
+
+## `return` is terminal
+
+```py
+def f(cond: bool) -> str:
+    if cond:
+        x = "test"
+    else:
+        return "early"
+    return x  # no possibly-unresolved-reference diagnostic!
+
+def g(cond: bool):
+    if cond:
+        x = "test"
+        reveal_type(x)  # revealed: Literal["test"]
+    else:
+        x = "unreachable"
+        reveal_type(x)  # revealed: Literal["unreachable"]
+        return
+    reveal_type(x)  # revealed: Literal["test"]
+```
+
+## `continue` is terminal within its loop scope
+
+TODO: We are not currently modeling the cyclic control flow for loops, pending fixpoint support in
+Salsa. The false positives in this section are because of that, and not our terminal statement
+support. See [ruff#14160](https://github.com/astral-sh/ruff/issues/14160) for more details.
+
+```py
+def f(cond: bool) -> str:
+    while True:
+        if cond:
+            x = "test"
+        else:
+            continue
+        return x
+
+def g(cond: bool, i: int):
+    x = "before"
+    for _ in range(i):
+        if cond:
+            x = "loop"
+            reveal_type(x)  # revealed: Literal["loop"]
+        else:
+            x = "continue"
+            reveal_type(x)  # revealed: Literal["continue"]
+            continue
+        reveal_type(x)  # revealed: Literal["loop"]
+    # TODO: Should be Literal["before", "loop", "continue"]
+    reveal_type(x)  # revealed: Literal["before", "loop"]
+```
+
+## `break` is terminal within its loop scope
+
+```py
+def f(cond: bool) -> str:
+    while True:
+        if cond:
+            x = "test"
+        else:
+            break
+        return x
+    return x  # error: [unresolved-reference]
+
+def g(cond: bool, i: int):
+    x = "before"
+    for _ in range(i):
+        if cond:
+            x = "loop"
+            reveal_type(x)  # revealed: Literal["loop"]
+        else:
+            x = "break"
+            reveal_type(x)  # revealed: Literal["break"]
+            break
+        reveal_type(x)  # revealed: Literal["loop"]
+    reveal_type(x)  # revealed: Literal["before", "loop", "break"]
+```
+
+## `return` is terminal in nested conditionals
+
+```py
+def f(cond1: bool, cond2: bool) -> str:
+    if cond1:
+        if cond2:
+            x = "test1"
+        else:
+            return "early"
+    else:
+        x = "test2"
+    return x
+
+def g(cond1: bool, cond2: bool):
+    if cond1:
+        if cond2:
+            x = "test1"
+            reveal_type(x)  # revealed: Literal["test1"]
+        else:
+            x = "unreachable"
+            reveal_type(x)  # revealed: Literal["unreachable"]
+            return
+        reveal_type(x)  # revealed: Literal["test1"]
+    else:
+        x = "test2"
+        reveal_type(x)  # revealed: Literal["test2"]
+    reveal_type(x)  # revealed: Literal["test1", "test2"]
+```
+
+## Terminal in a `finally` block
+
+Control-flow through finally isn't working right yet:
+
+```py
+def f():
+    x = 1
+    while True:
+        try:
+            break
+        finally:
+            x = 2
+    # TODO: should be Literal[2]
+    reveal_type(x)  # revealed: Literal[1]
+```
+
+## Early returns and nested functions
+
+Free references inside of a function body refer to variables defined in the containing scope.
+Function bodies are _lazy scopes_: at runtime, these references are not resolved immediately at the
+point of the function definition. Instead, they are resolved _at the time of the call_, which means
+that their values (and types) can be different for different invocations. For simplicity, we instead
+resolve free references _at the end of the containing scope_. That means that in the examples below,
+all of the `x` bindings should be visible to the `reveal_type`, regardless of where we place the
+`return` statements.
+
+TODO: These currently produce the wrong results, but not because of our terminal statement support.
+See [ruff#15777](https://github.com/astral-sh/ruff/issues/15777) for more details.
+
+```py
+def top_level_return(cond1: bool, cond2: bool):
+    x = 1
+
+    def g():
+        # TODO eliminate Unknown
+        reveal_type(x)  # revealed: Unknown | Literal[1, 2, 3]
+    if cond1:
+        if cond2:
+            x = 2
+        else:
+            x = 3
+    return
+
+def return_from_if(cond1: bool, cond2: bool):
+    x = 1
+
+    def g():
+        # TODO: Literal[1, 2, 3]
+        reveal_type(x)  # revealed: Unknown | Literal[1]
+    if cond1:
+        if cond2:
+            x = 2
+        else:
+            x = 3
+        return
+
+def return_from_nested_if(cond1: bool, cond2: bool):
+    x = 1
+
+    def g():
+        # TODO: Literal[1, 2, 3]
+        reveal_type(x)  # revealed: Unknown | Literal[1, 3]
+    if cond1:
+        if cond2:
+            x = 2
+            return
+        else:
+            x = 3
+```
+
+## Early returns and list comprehensions
+
+```py
+def f(x: str) -> int:
+    y = [x for i in range(len(x))]
+    return 4
+```
@@ -368,6 +368,12 @@ impl<'db> SemanticIndexBuilder<'db> {
             .record_visibility_constraint(VisibilityConstraint::VisibleIf(constraint))
     }
 
+    /// Records that all remaining statements in the current block are unreachable, and therefore
+    /// not visible.
+    fn mark_unreachable(&mut self) {
+        self.current_use_def_map_mut().mark_unreachable();
+    }
+
     /// Records a [`VisibilityConstraint::Ambiguous`] constraint.
     fn record_ambiguous_visibility(&mut self) -> ScopedVisibilityConstraintId {
         self.current_use_def_map_mut()
@@ -1019,11 +1025,6 @@ where
                 }
                 self.visit_body(body);
             }
-            ast::Stmt::Break(_) => {
-                if self.loop_state().is_inside() {
-                    self.loop_break_states.push(self.flow_snapshot());
-                }
-            }
 
             ast::Stmt::For(
                 for_stmt @ ast::StmtFor {
@@ -1270,6 +1271,21 @@ where
                 // - https://github.com/astral-sh/ruff/pull/13633#discussion_r1788626702
                 self.visit_body(finalbody);
             }
+
+            ast::Stmt::Raise(_) | ast::Stmt::Return(_) | ast::Stmt::Continue(_) => {
+                walk_stmt(self, stmt);
+                // Everything in the current block after a terminal statement is unreachable.
+                self.mark_unreachable();
+            }
+
+            ast::Stmt::Break(_) => {
+                if self.loop_state().is_inside() {
+                    self.loop_break_states.push(self.flow_snapshot());
+                }
+                // Everything in the current block after a terminal statement is unreachable.
+                self.mark_unreachable();
+            }
+
             _ => {
                 walk_stmt(self, stmt);
             }

@@ -476,6 +476,7 @@ impl std::iter::FusedIterator for DeclarationsIterator<'_, '_> {}
 pub(super) struct FlowSnapshot {
     symbol_states: IndexVec<ScopedSymbolId, SymbolState>,
     scope_start_visibility: ScopedVisibilityConstraintId,
+    reachable: bool,
 }
 
 #[derive(Debug)]
@@ -503,6 +504,8 @@ pub(super) struct UseDefMapBuilder<'db> {
 
     /// Currently live bindings and declarations for each symbol.
     symbol_states: IndexVec<ScopedSymbolId, SymbolState>,
+
+    reachable: bool,
 }
 
 impl Default for UseDefMapBuilder<'_> {
@@ -515,11 +518,16 @@ impl Default for UseDefMapBuilder<'_> {
             bindings_by_use: IndexVec::new(),
             definitions_by_definition: FxHashMap::default(),
             symbol_states: IndexVec::new(),
+            reachable: true,
         }
     }
 }
 
 impl<'db> UseDefMapBuilder<'db> {
+    pub(super) fn mark_unreachable(&mut self) {
+        self.reachable = false;
+    }
+
     pub(super) fn add_symbol(&mut self, symbol: ScopedSymbolId) {
         let new_symbol = self
             .symbol_states
@@ -656,6 +664,7 @@ impl<'db> UseDefMapBuilder<'db> {
         FlowSnapshot {
             symbol_states: self.symbol_states.clone(),
             scope_start_visibility: self.scope_start_visibility,
+            reachable: self.reachable,
         }
     }
 
@@ -678,12 +687,25 @@ impl<'db> UseDefMapBuilder<'db> {
             num_symbols,
             SymbolState::undefined(self.scope_start_visibility),
         );
+
+        self.reachable = snapshot.reachable;
     }
 
     /// Merge the given snapshot into the current state, reflecting that we might have taken either
     /// path to get here. The new state for each symbol should include definitions from both the
     /// prior state and the snapshot.
     pub(super) fn merge(&mut self, snapshot: FlowSnapshot) {
+        // Unreachable snapshots should not be merged: If the current snapshot is unreachable, it
+        // should be completely overwritten by the snapshot we're merging in. If the other snapshot
+        // is unreachable, we should return without merging.
+        if !snapshot.reachable {
+            return;
+        }
+        if !self.reachable {
+            self.restore(snapshot);
+            return;
+        }
+
         // We never remove symbols from `symbol_states` (it's an IndexVec, and the symbol
         // IDs must line up), so the current number of known symbols must always be equal to or
         // greater than the number of known symbols in a previously-taken snapshot.
@@ -705,6 +727,9 @@ impl<'db> UseDefMapBuilder<'db> {
         self.scope_start_visibility = self
             .visibility_constraints
             .add_or_constraint(self.scope_start_visibility, snapshot.scope_start_visibility);
+
+        // At least one of the two snapshots was reachable, so the merged result is too.
+        self.reachable = true;
     }
 
     pub(super) fn finish(mut self) -> UseDefMap<'db> {

diff --git a/crates/ruff_benchmark/benches/red_knot.rs b/crates/ruff_benchmark/benches/red_knot.rs
@@ -26,25 +26,8 @@ const TOMLLIB_312_URL: &str = "https://raw.githubusercontent.com/python/cpython/
 static EXPECTED_DIAGNOSTICS: &[&str] = &[
     // We don't support `*` imports yet:
     "error[lint:unresolved-import] /src/tomllib/_parser.py:7:29 Module `collections.abc` has no member `Iterable`",
-    // We don't support terminal statements in control flow yet:
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:66:18 Name `s` used when possibly not defined",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:98:12 Name `char` used when possibly not defined",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:101:12 Name `char` used when possibly not defined",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:104:14 Name `char` used when possibly not defined",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:115:14 Name `char` used when possibly not defined",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:126:12 Name `char` used when possibly not defined",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:348:20 Name `nest` used when possibly not defined",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:353:5 Name `nest` used when possibly not defined",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:453:24 Name `nest` used when possibly not defined",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:455:9 Name `nest` used when possibly not defined",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:482:16 Name `char` used when possibly not defined",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:566:12 Name `char` used when possibly not defined",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:573:12 Name `char` used when possibly not defined",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:579:12 Name `char` used when possibly not defined",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:580:63 Name `char` used when possibly not defined",
     // We don't handle intersections in `is_assignable_to` yet
     "error[lint:invalid-argument-type] /src/tomllib/_parser.py:626:46 Object of type `Unknown & ~AlwaysFalsy | @Todo & ~AlwaysFalsy` cannot be assigned to parameter 1 (`match`) of function `match_to_datetime`; expected type `Match`",
-    "warning[lint:possibly-unresolved-reference] /src/tomllib/_parser.py:629:38 Name `datetime_obj` used when possibly not defined",
     "error[lint:invalid-argument-type] /src/tomllib/_parser.py:632:58 Object of type `Unknown & ~AlwaysFalsy | @Todo & ~AlwaysFalsy` cannot be assigned to parameter 1 (`match`) of function `match_to_localtime`; expected type `Match`",
     "error[lint:invalid-argument-type] /src/tomllib/_parser.py:639:52 Object of type `Unknown & ~AlwaysFalsy | @Todo & ~AlwaysFalsy` cannot be assigned to parameter 1 (`match`) of function `match_to_number`; expected type `Match`",
     "warning[lint:unused-ignore-comment] /src/tomllib/_parser.py:682:31 Unused blanket `type: ignore` directive",