diff --git a/crates/red_knot_python_semantic/resources/mdtest/annotations/literal.md b/crates/red_knot_python_semantic/resources/mdtest/annotations/literal.md index 75c311800b685..138478dbdf677 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/annotations/literal.md +++ b/crates/red_knot_python_semantic/resources/mdtest/annotations/literal.md @@ -68,7 +68,7 @@ def x( a3: Literal[Literal["w"], Literal["r"], Literal[Literal["w+"]]], a4: Literal[True] | Literal[1, 2] | Literal["foo"], ): - reveal_type(a1) # revealed: Literal[1, 2, 3, "foo", 5] | None + reveal_type(a1) # revealed: Literal[1, 2, 3, 5, "foo"] | None reveal_type(a2) # revealed: Literal["w", "r"] reveal_type(a3) # revealed: Literal["w", "r", "w+"] reveal_type(a4) # revealed: Literal[True, 1, 2, "foo"] @@ -108,7 +108,7 @@ def union_example( None, ], ): - reveal_type(x) # revealed: Unknown | Literal[-1, "A", b"A", b"\x00", b"\x07", 0, 1, "B", "foo", "bar", True] | None + reveal_type(x) # revealed: Unknown | Literal[-1, 0, 1, "A", "B", "foo", "bar", b"A", b"\x00", b"\x07", True] | None ``` ## Detecting Literal outside typing and typing_extensions diff --git a/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md b/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md index a1b47a30c7747..f44c000855ea6 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md +++ b/crates/red_knot_python_semantic/resources/mdtest/annotations/string.md @@ -105,7 +105,7 @@ def f1( from typing import Literal def f(v: Literal["a", r"b", b"c", "d" "e", "\N{LATIN SMALL LETTER F}", "\x67", """h"""]): - reveal_type(v) # revealed: Literal["a", "b", b"c", "de", "f", "g", "h"] + reveal_type(v) # revealed: Literal["a", "b", "de", "f", "g", "h", b"c"] ``` ## Class variables diff --git a/crates/red_knot_python_semantic/src/types/builder.rs b/crates/red_knot_python_semantic/src/types/builder.rs index c932474703500..43f46cf715c50 100644 --- a/crates/red_knot_python_semantic/src/types/builder.rs +++ b/crates/red_knot_python_semantic/src/types/builder.rs @@ -12,6 +12,11 @@ //! flattens into the outer one), intersections cannot contain other intersections (also //! flattens), and intersections cannot contain unions (the intersection distributes over the //! union, inverting it into a union-of-intersections). +//! * No type in a union can be a subtype of any other type in the union (just eliminate the +//! subtype from the union). +//! * No type in an intersection can be a supertype of any other type in the intersection (just +//! eliminate the supertype from the intersection). +//! * An intersection containing two non-overlapping types simplifies to [`Type::Never`]. //! //! The implication of these invariants is that a [`UnionBuilder`] does not necessarily build a //! [`Type::Union`]. For example, if only one type is added to the [`UnionBuilder`], `build()` will @@ -19,19 +24,35 @@ //! union type is added to the intersection, it will distribute and [`IntersectionBuilder::build`] //! may end up returning a [`Type::Union`] of intersections. //! -//! In the future we should have these additional invariants, but they aren't implemented yet: -//! * No type in a union can be a subtype of any other type in the union (just eliminate the -//! subtype from the union). -//! * No type in an intersection can be a supertype of any other type in the intersection (just -//! eliminate the supertype from the intersection). -//! * An intersection containing two non-overlapping types should simplify to [`Type::Never`]. - -use crate::types::{IntersectionType, KnownClass, Type, TypeVarBoundOrConstraints, UnionType}; +//! ## Performance +//! +//! In practice, there are two kinds of unions found in the wild: relatively-small unions made up +//! of normal user types (classes, etc), and large unions made up of literals, which can occur via +//! large enums (not yet implemented) or from string/integer/bytes literals, which can grow due to +//! literal arithmetic or operations on literal strings/bytes. For normal unions, it's most +//! efficient to just store the member types in a vector, and do O(n^2) `is_subtype_of` checks to +//! maintain the union in simplified form. But literal unions can grow to a size where this becomes +//! a performance problem. For this reason, we group literal types in `UnionBuilder`. Since every +//! different string literal type shares exactly the same possible super-types, and none of them +//! are subtypes of each other (unless exactly the same literal type), we can avoid many +//! unnecessary `is_subtype_of` checks. + +use crate::types::{ + BytesLiteralType, IntersectionType, KnownClass, StringLiteralType, Type, + TypeVarBoundOrConstraints, UnionType, +}; use crate::{Db, FxOrderSet}; use smallvec::SmallVec; +enum UnionElement<'db> { + IntLiterals(FxOrderSet), + StringLiterals(FxOrderSet>), + BytesLiterals(FxOrderSet>), + Type(Type<'db>), +} + pub(crate) struct UnionBuilder<'db> { - elements: Vec>, + elements: Vec>, db: &'db dyn Db, } @@ -50,7 +71,8 @@ impl<'db> UnionBuilder<'db> { /// Collapse the union to a single type: `object`. fn collapse_to_object(mut self) -> Self { self.elements.clear(); - self.elements.push(Type::object(self.db)); + self.elements + .push(UnionElement::Type(Type::object(self.db))); self } @@ -66,6 +88,76 @@ impl<'db> UnionBuilder<'db> { } // Adding `Never` to a union is a no-op. Type::Never => {} + // If adding a string literal, look for an existing `UnionElement::StringLiterals` to + // add it to, or an existing element that is a super-type of string literals, which + // means we shouldn't add it. Otherwise, add a new `UnionElement::StringLiterals` + // containing it. + Type::StringLiteral(literal) => { + let mut found = false; + for element in &mut self.elements { + match element { + UnionElement::StringLiterals(literals) => { + literals.insert(literal); + found = true; + break; + } + UnionElement::Type(existing) if ty.is_subtype_of(self.db, *existing) => { + return self; + } + _ => {} + } + } + if !found { + self.elements + .push(UnionElement::StringLiterals(FxOrderSet::from_iter([ + literal, + ]))); + } + } + // Same for bytes literals as for string literals, above. + Type::BytesLiteral(literal) => { + let mut found = false; + for element in &mut self.elements { + match element { + UnionElement::BytesLiterals(literals) => { + literals.insert(literal); + found = true; + break; + } + UnionElement::Type(existing) if ty.is_subtype_of(self.db, *existing) => { + return self; + } + _ => {} + } + } + if !found { + self.elements + .push(UnionElement::BytesLiterals(FxOrderSet::from_iter([ + literal, + ]))); + } + } + // And same for int literals as well. + Type::IntLiteral(literal) => { + let mut found = false; + for element in &mut self.elements { + match element { + UnionElement::IntLiterals(literals) => { + literals.insert(literal); + found = true; + break; + } + UnionElement::Type(existing) if ty.is_subtype_of(self.db, *existing) => { + return self; + } + _ => {} + } + } + if !found { + self.elements + .push(UnionElement::IntLiterals(FxOrderSet::from_iter([literal]))); + } + } // Adding `object` to a union results in `object`. ty if ty.is_object(self.db) => { return self.collapse_to_object(); @@ -81,8 +173,27 @@ impl<'db> UnionBuilder<'db> { let mut to_remove = SmallVec::<[usize; 2]>::new(); let ty_negated = ty.negate(self.db); - for (index, element) in self.elements.iter().enumerate() { - if Some(*element) == bool_pair { + for (index, element) in self + .elements + .iter() + .map(|element| { + // For literals, the first element in the set can stand in for all the rest, + // since they all have the same super-types. SAFETY: a `UnionElement` of + // literal kind must always have at least one element in it. + match element { + UnionElement::IntLiterals(literals) => Type::IntLiteral(literals[0]), + UnionElement::StringLiterals(literals) => { + Type::StringLiteral(literals[0]) + } + UnionElement::BytesLiterals(literals) => { + Type::BytesLiteral(literals[0]) + } + UnionElement::Type(ty) => *ty, + } + }) + .enumerate() + { + if Some(element) == bool_pair { to_add = KnownClass::Bool.to_instance(self.db); to_remove.push(index); // The type we are adding is a BooleanLiteral, which doesn't have any @@ -92,14 +203,14 @@ impl<'db> UnionBuilder<'db> { break; } - if ty.is_same_gradual_form(*element) - || ty.is_subtype_of(self.db, *element) + if ty.is_same_gradual_form(element) + || ty.is_subtype_of(self.db, element) || element.is_object(self.db) { return self; } else if element.is_subtype_of(self.db, ty) { to_remove.push(index); - } else if ty_negated.is_subtype_of(self.db, *element) { + } else if ty_negated.is_subtype_of(self.db, element) { // We add `ty` to the union. We just checked that `~ty` is a subtype of an existing `element`. // This also means that `~ty | ty` is a subtype of `element | ty`, because both elements in the // first union are subtypes of the corresponding elements in the second union. But `~ty | ty` is @@ -111,13 +222,13 @@ impl<'db> UnionBuilder<'db> { } } if let Some((&first, rest)) = to_remove.split_first() { - self.elements[first] = to_add; + self.elements[first] = UnionElement::Type(to_add); // We iterate in descending order to keep remaining indices valid after `swap_remove`. for &index in rest.iter().rev() { self.elements.swap_remove(index); } } else { - self.elements.push(to_add); + self.elements.push(UnionElement::Type(to_add)); } } } @@ -125,10 +236,25 @@ impl<'db> UnionBuilder<'db> { } pub(crate) fn build(self) -> Type<'db> { - match self.elements.len() { + let mut types = vec![]; + for element in self.elements { + match element { + UnionElement::IntLiterals(literals) => { + types.extend(literals.into_iter().map(Type::IntLiteral)); + } + UnionElement::StringLiterals(literals) => { + types.extend(literals.into_iter().map(Type::StringLiteral)); + } + UnionElement::BytesLiterals(literals) => { + types.extend(literals.into_iter().map(Type::BytesLiteral)); + } + UnionElement::Type(ty) => types.push(ty), + } + } + match types.len() { 0 => Type::Never, - 1 => self.elements[0], - _ => Type::Union(UnionType::new(self.db, self.elements.into_boxed_slice())), + 1 => types[0], + _ => Type::Union(UnionType::new(self.db, types.into_boxed_slice())), } } }