Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def x(
a3: Literal[Literal["w"], Literal["r"], Literal[Literal["w+"]]],
a4: Literal[True] | Literal[1, 2] | Literal["foo"],
):
reveal_type(a1) # revealed: Literal[1, 2, 3, "foo", 5] | None
reveal_type(a1) # revealed: Literal[1, 2, 3, 5, "foo"] | None
reveal_type(a2) # revealed: Literal["w", "r"]
reveal_type(a3) # revealed: Literal["w", "r", "w+"]
reveal_type(a4) # revealed: Literal[True, 1, 2, "foo"]
Expand Down Expand Up @@ -108,7 +108,7 @@ def union_example(
None,
],
):
reveal_type(x) # revealed: Unknown | Literal[-1, "A", b"A", b"\x00", b"\x07", 0, 1, "B", "foo", "bar", True] | None
reveal_type(x) # revealed: Unknown | Literal[-1, 0, 1, "A", "B", "foo", "bar", b"A", b"\x00", b"\x07", True] | None
```

## Detecting Literal outside typing and typing_extensions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def f1(
from typing import Literal

def f(v: Literal["a", r"b", b"c", "d" "e", "\N{LATIN SMALL LETTER F}", "\x67", """h"""]):
reveal_type(v) # revealed: Literal["a", "b", b"c", "de", "f", "g", "h"]
reveal_type(v) # revealed: Literal["a", "b", "de", "f", "g", "h", b"c"]
```

## Class variables
Expand Down
166 changes: 146 additions & 20 deletions crates/red_knot_python_semantic/src/types/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,47 @@
//! flattens into the outer one), intersections cannot contain other intersections (also
//! flattens), and intersections cannot contain unions (the intersection distributes over the
//! union, inverting it into a union-of-intersections).
//! * No type in a union can be a subtype of any other type in the union (just eliminate the
//! subtype from the union).
//! * No type in an intersection can be a supertype of any other type in the intersection (just
//! eliminate the supertype from the intersection).
//! * An intersection containing two non-overlapping types simplifies to [`Type::Never`].
//!
//! The implication of these invariants is that a [`UnionBuilder`] does not necessarily build a
//! [`Type::Union`]. For example, if only one type is added to the [`UnionBuilder`], `build()` will
//! just return that type directly. The same is true for [`IntersectionBuilder`]; for example, if a
//! union type is added to the intersection, it will distribute and [`IntersectionBuilder::build`]
//! may end up returning a [`Type::Union`] of intersections.
//!
//! In the future we should have these additional invariants, but they aren't implemented yet:
//! * No type in a union can be a subtype of any other type in the union (just eliminate the
//! subtype from the union).
//! * No type in an intersection can be a supertype of any other type in the intersection (just
//! eliminate the supertype from the intersection).
//! * An intersection containing two non-overlapping types should simplify to [`Type::Never`].

use crate::types::{IntersectionType, KnownClass, Type, TypeVarBoundOrConstraints, UnionType};
//! ## Performance
//!
//! In practice, there are two kinds of unions found in the wild: relatively-small unions made up
//! of normal user types (classes, etc), and large unions made up of literals, which can occur via
//! large enums (not yet implemented) or from string/integer/bytes literals, which can grow due to
//! literal arithmetic or operations on literal strings/bytes. For normal unions, it's most
//! efficient to just store the member types in a vector, and do O(n^2) `is_subtype_of` checks to
//! maintain the union in simplified form. But literal unions can grow to a size where this becomes
//! a performance problem. For this reason, we group literal types in `UnionBuilder`. Since every
//! different string literal type shares exactly the same possible super-types, and none of them
//! are subtypes of each other (unless exactly the same literal type), we can avoid many
//! unnecessary `is_subtype_of` checks.

use crate::types::{
BytesLiteralType, IntersectionType, KnownClass, StringLiteralType, Type,
TypeVarBoundOrConstraints, UnionType,
};
use crate::{Db, FxOrderSet};
use smallvec::SmallVec;

enum UnionElement<'db> {
IntLiterals(FxOrderSet<i64>),
StringLiterals(FxOrderSet<StringLiteralType<'db>>),
BytesLiterals(FxOrderSet<BytesLiteralType<'db>>),
Type(Type<'db>),
}

pub(crate) struct UnionBuilder<'db> {
elements: Vec<Type<'db>>,
elements: Vec<UnionElement<'db>>,
db: &'db dyn Db,
}

Expand All @@ -50,7 +71,8 @@ impl<'db> UnionBuilder<'db> {
/// Collapse the union to a single type: `object`.
fn collapse_to_object(mut self) -> Self {
self.elements.clear();
self.elements.push(Type::object(self.db));
self.elements
.push(UnionElement::Type(Type::object(self.db)));
self
}

Expand All @@ -66,6 +88,76 @@ impl<'db> UnionBuilder<'db> {
}
// Adding `Never` to a union is a no-op.
Type::Never => {}
// If adding a string literal, look for an existing `UnionElement::StringLiterals` to
// add it to, or an existing element that is a super-type of string literals, which
// means we shouldn't add it. Otherwise, add a new `UnionElement::StringLiterals`
// containing it.
Type::StringLiteral(literal) => {
let mut found = false;
for element in &mut self.elements {
match element {
UnionElement::StringLiterals(literals) => {
literals.insert(literal);
found = true;
break;
}
UnionElement::Type(existing) if ty.is_subtype_of(self.db, *existing) => {
return self;
}
_ => {}
}
}
if !found {
self.elements
.push(UnionElement::StringLiterals(FxOrderSet::from_iter([
literal,
])));
}
}
// Same for bytes literals as for string literals, above.
Type::BytesLiteral(literal) => {
let mut found = false;
for element in &mut self.elements {
match element {
UnionElement::BytesLiterals(literals) => {
literals.insert(literal);
found = true;
break;
}
UnionElement::Type(existing) if ty.is_subtype_of(self.db, *existing) => {
return self;
}
_ => {}
}
}
if !found {
self.elements
.push(UnionElement::BytesLiterals(FxOrderSet::from_iter([
literal,
])));
}
}
// And same for int literals as well.
Type::IntLiteral(literal) => {
let mut found = false;
for element in &mut self.elements {
match element {
UnionElement::IntLiterals(literals) => {
literals.insert(literal);
found = true;
break;
}
UnionElement::Type(existing) if ty.is_subtype_of(self.db, *existing) => {
return self;
}
_ => {}
}
}
if !found {
self.elements
.push(UnionElement::IntLiterals(FxOrderSet::from_iter([literal])));
}
}
// Adding `object` to a union results in `object`.
ty if ty.is_object(self.db) => {
return self.collapse_to_object();
Expand All @@ -81,8 +173,27 @@ impl<'db> UnionBuilder<'db> {
let mut to_remove = SmallVec::<[usize; 2]>::new();
let ty_negated = ty.negate(self.db);

for (index, element) in self.elements.iter().enumerate() {
if Some(*element) == bool_pair {
for (index, element) in self
.elements
.iter()
.map(|element| {
// For literals, the first element in the set can stand in for all the rest,
// since they all have the same super-types. SAFETY: a `UnionElement` of
// literal kind must always have at least one element in it.
match element {
UnionElement::IntLiterals(literals) => Type::IntLiteral(literals[0]),
UnionElement::StringLiterals(literals) => {
Type::StringLiteral(literals[0])
}
UnionElement::BytesLiterals(literals) => {
Type::BytesLiteral(literals[0])
}
UnionElement::Type(ty) => *ty,
}
})
.enumerate()
{
if Some(element) == bool_pair {
to_add = KnownClass::Bool.to_instance(self.db);
to_remove.push(index);
// The type we are adding is a BooleanLiteral, which doesn't have any
Expand All @@ -92,14 +203,14 @@ impl<'db> UnionBuilder<'db> {
break;
}

if ty.is_same_gradual_form(*element)
|| ty.is_subtype_of(self.db, *element)
if ty.is_same_gradual_form(element)
|| ty.is_subtype_of(self.db, element)
|| element.is_object(self.db)
{
return self;
} else if element.is_subtype_of(self.db, ty) {
to_remove.push(index);
} else if ty_negated.is_subtype_of(self.db, *element) {
} else if ty_negated.is_subtype_of(self.db, element) {
// We add `ty` to the union. We just checked that `~ty` is a subtype of an existing `element`.
// This also means that `~ty | ty` is a subtype of `element | ty`, because both elements in the
// first union are subtypes of the corresponding elements in the second union. But `~ty | ty` is
Expand All @@ -111,24 +222,39 @@ impl<'db> UnionBuilder<'db> {
}
}
if let Some((&first, rest)) = to_remove.split_first() {
self.elements[first] = to_add;
self.elements[first] = UnionElement::Type(to_add);
// We iterate in descending order to keep remaining indices valid after `swap_remove`.
for &index in rest.iter().rev() {
self.elements.swap_remove(index);
}
} else {
self.elements.push(to_add);
self.elements.push(UnionElement::Type(to_add));
}
}
}
self
}

pub(crate) fn build(self) -> Type<'db> {
match self.elements.len() {
let mut types = vec![];
for element in self.elements {
match element {
UnionElement::IntLiterals(literals) => {
types.extend(literals.into_iter().map(Type::IntLiteral));
}
UnionElement::StringLiterals(literals) => {
types.extend(literals.into_iter().map(Type::StringLiteral));
}
UnionElement::BytesLiterals(literals) => {
types.extend(literals.into_iter().map(Type::BytesLiteral));
}
UnionElement::Type(ty) => types.push(ty),
}
}
match types.len() {
0 => Type::Never,
1 => self.elements[0],
_ => Type::Union(UnionType::new(self.db, self.elements.into_boxed_slice())),
1 => types[0],
_ => Type::Union(UnionType::new(self.db, types.into_boxed_slice())),
}
}
}
Expand Down
Loading