From d8cf8ac2ef26bb630b43b095f61662173b2bac2f Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Fri, 19 Jul 2024 17:44:56 +0100 Subject: [PATCH] [red-knot] Resolve symbols from `builtins.pyi` in the stdlib if they cannot be found in other scopes (#12390) Co-authored-by: Carl Meyer --- crates/red_knot_module_resolver/src/path.rs | 8 +- .../red_knot_module_resolver/src/resolver.rs | 85 +++++++++++++ .../red_knot_python_semantic/src/builtins.rs | 16 +++ crates/red_knot_python_semantic/src/db.rs | 2 + crates/red_knot_python_semantic/src/lib.rs | 1 + crates/red_knot_python_semantic/src/types.rs | 10 ++ .../src/types/display.rs | 8 +- .../src/types/infer.rs | 112 +++++++++++++++++- 8 files changed, 231 insertions(+), 11 deletions(-) create mode 100644 crates/red_knot_python_semantic/src/builtins.rs diff --git a/crates/red_knot_module_resolver/src/path.rs b/crates/red_knot_module_resolver/src/path.rs index 87516173065a2..d0577a5055266 100644 --- a/crates/red_knot_module_resolver/src/path.rs +++ b/crates/red_knot_module_resolver/src/path.rs @@ -233,10 +233,16 @@ impl ModuleResolutionPathBuf { ModuleResolutionPathRef::from(self).is_directory(search_path, resolver) } - pub(crate) fn is_site_packages(&self) -> bool { + #[must_use] + pub(crate) const fn is_site_packages(&self) -> bool { matches!(self.0, ModuleResolutionPathBufInner::SitePackages(_)) } + #[must_use] + pub(crate) const fn is_standard_library(&self) -> bool { + matches!(self.0, ModuleResolutionPathBufInner::StandardLibrary(_)) + } + #[must_use] pub(crate) fn with_pyi_extension(&self) -> Self { ModuleResolutionPathRef::from(self).with_pyi_extension() diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 9da957010ad9f..8a281763522a3 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use std::iter::FusedIterator; use std::sync::Arc; +use once_cell::sync::Lazy; use rustc_hash::{FxBuildHasher, FxHashSet}; use ruff_db::files::{File, FilePath}; @@ -442,6 +443,52 @@ pub(crate) mod internal { } } +/// Modules that are builtin to the Python interpreter itself. +/// +/// When these module names are imported, standard module resolution is bypassed: +/// the module name always resolves to the stdlib module, +/// even if there's a module of the same name in the workspace root +/// (which would normally result in the stdlib module being overridden). +/// +/// TODO(Alex): write a script to generate this list, +/// similar to what we do in `crates/ruff_python_stdlib/src/sys.rs` +static BUILTIN_MODULES: Lazy> = Lazy::new(|| { + const BUILTIN_MODULE_NAMES: &[&str] = &[ + "_abc", + "_ast", + "_codecs", + "_collections", + "_functools", + "_imp", + "_io", + "_locale", + "_operator", + "_signal", + "_sre", + "_stat", + "_string", + "_symtable", + "_thread", + "_tokenize", + "_tracemalloc", + "_typing", + "_warnings", + "_weakref", + "atexit", + "builtins", + "errno", + "faulthandler", + "gc", + "itertools", + "marshal", + "posix", + "pwd", + "sys", + "time", + ]; + BUILTIN_MODULE_NAMES.iter().copied().collect() +}); + /// Given a module name and a list of search paths in which to lookup modules, /// attempt to resolve the module name fn resolve_name( @@ -450,8 +497,12 @@ fn resolve_name( ) -> Option<(Arc, File, ModuleKind)> { let resolver_settings = module_resolution_settings(db); let resolver_state = ResolverState::new(db, resolver_settings.target_version()); + let is_builtin_module = BUILTIN_MODULES.contains(&name.as_str()); for search_path in resolver_settings.search_paths(db) { + if is_builtin_module && !search_path.is_standard_library() { + continue; + } let mut components = name.components(); let module_name = components.next_back()?; @@ -629,6 +680,40 @@ mod tests { ); } + #[test] + fn builtins_vendored() { + let TestCase { db, stdlib, .. } = TestCaseBuilder::new() + .with_vendored_typeshed() + .with_src_files(&[("builtins.py", "FOOOO = 42")]) + .build(); + + let builtins_module_name = ModuleName::new_static("builtins").unwrap(); + let builtins = resolve_module(&db, builtins_module_name).expect("builtins to resolve"); + + assert_eq!(builtins.file().path(&db), &stdlib.join("builtins.pyi")); + } + + #[test] + fn builtins_custom() { + const TYPESHED: MockedTypeshed = MockedTypeshed { + stdlib_files: &[("builtins.pyi", "def min(a, b): ...")], + versions: "builtins: 3.8-", + }; + + const SRC: &[FileSpec] = &[("builtins.py", "FOOOO = 42")]; + + let TestCase { db, stdlib, .. } = TestCaseBuilder::new() + .with_src_files(SRC) + .with_custom_typeshed(TYPESHED) + .with_target_version(TargetVersion::Py38) + .build(); + + let builtins_module_name = ModuleName::new_static("builtins").unwrap(); + let builtins = resolve_module(&db, builtins_module_name).expect("builtins to resolve"); + + assert_eq!(builtins.file().path(&db), &stdlib.join("builtins.pyi")); + } + #[test] fn stdlib() { const TYPESHED: MockedTypeshed = MockedTypeshed { diff --git a/crates/red_knot_python_semantic/src/builtins.rs b/crates/red_knot_python_semantic/src/builtins.rs new file mode 100644 index 0000000000000..3eb0f5f7361a3 --- /dev/null +++ b/crates/red_knot_python_semantic/src/builtins.rs @@ -0,0 +1,16 @@ +use red_knot_module_resolver::{resolve_module, ModuleName}; + +use crate::semantic_index::global_scope; +use crate::semantic_index::symbol::ScopeId; +use crate::Db; + +/// Salsa query to get the builtins scope. +/// +/// Can return None if a custom typeshed is used that is missing `builtins.pyi`. +#[salsa::tracked] +pub(crate) fn builtins_scope(db: &dyn Db) -> Option> { + let builtins_name = + ModuleName::new_static("builtins").expect("Expected 'builtins' to be a valid module name"); + let builtins_file = resolve_module(db.upcast(), builtins_name)?.file(); + Some(global_scope(db, builtins_file)) +} diff --git a/crates/red_knot_python_semantic/src/db.rs b/crates/red_knot_python_semantic/src/db.rs index 7fabc88725142..e2ca1d22ccc33 100644 --- a/crates/red_knot_python_semantic/src/db.rs +++ b/crates/red_knot_python_semantic/src/db.rs @@ -3,6 +3,7 @@ use salsa::DbWithJar; use red_knot_module_resolver::Db as ResolverDb; use ruff_db::{Db as SourceDb, Upcast}; +use crate::builtins::builtins_scope; use crate::semantic_index::definition::Definition; use crate::semantic_index::expression::Expression; use crate::semantic_index::symbol::ScopeId; @@ -28,6 +29,7 @@ pub struct Jar( infer_definition_types, infer_expression_types, infer_scope_types, + builtins_scope, ); /// Database giving access to semantic information about a Python program. diff --git a/crates/red_knot_python_semantic/src/lib.rs b/crates/red_knot_python_semantic/src/lib.rs index 6d0de8fb83455..236b0aa534030 100644 --- a/crates/red_knot_python_semantic/src/lib.rs +++ b/crates/red_knot_python_semantic/src/lib.rs @@ -6,6 +6,7 @@ pub use db::{Db, Jar}; pub use semantic_model::{HasTy, SemanticModel}; pub mod ast_node_ref; +mod builtins; mod db; mod node_key; pub mod semantic_index; diff --git a/crates/red_knot_python_semantic/src/types.rs b/crates/red_knot_python_semantic/src/types.rs index f0b30348e63fb..b78cfc3bd1d7d 100644 --- a/crates/red_knot_python_semantic/src/types.rs +++ b/crates/red_knot_python_semantic/src/types.rs @@ -1,6 +1,7 @@ use ruff_db::files::File; use ruff_python_ast::name::Name; +use crate::builtins::builtins_scope; use crate::semantic_index::definition::Definition; use crate::semantic_index::symbol::{ScopeId, ScopedSymbolId}; use crate::semantic_index::{global_scope, symbol_table, use_def_map}; @@ -47,6 +48,15 @@ pub(crate) fn global_symbol_ty_by_name<'db>(db: &'db dyn Db, file: File, name: & symbol_ty_by_name(db, global_scope(db, file), name) } +/// Shorthand for `symbol_ty` that looks up a symbol in the builtins. +/// +/// Returns `None` if the builtins module isn't available for some reason. +pub(crate) fn builtins_symbol_ty_by_name<'db>(db: &'db dyn Db, name: &str) -> Type<'db> { + builtins_scope(db) + .map(|builtins| symbol_ty_by_name(db, builtins, name)) + .unwrap_or(Type::Unbound) +} + /// Infer the type of a [`Definition`]. pub(crate) fn definition_ty<'db>(db: &'db dyn Db, definition: Definition<'db>) -> Type<'db> { let inference = infer_definition_types(db, definition); diff --git a/crates/red_knot_python_semantic/src/types/display.rs b/crates/red_knot_python_semantic/src/types/display.rs index d42119e4b724a..42850e9e4c82e 100644 --- a/crates/red_knot_python_semantic/src/types/display.rs +++ b/crates/red_knot_python_semantic/src/types/display.rs @@ -29,13 +29,9 @@ impl Display for DisplayType<'_> { write!(f, "", file.path(self.db.upcast())) } // TODO functions and classes should display using a fully qualified name - Type::Class(class) => { - f.write_str("Literal[")?; - f.write_str(&class.name(self.db))?; - f.write_str("]") - } + Type::Class(class) => write!(f, "Literal[{}]", class.name(self.db)), Type::Instance(class) => f.write_str(&class.name(self.db)), - Type::Function(function) => f.write_str(&function.name(self.db)), + Type::Function(function) => write!(f, "Literal[{}]", function.name(self.db)), Type::Union(union) => union.display(self.db).fmt(f), Type::Intersection(intersection) => intersection.display(self.db).fmt(f), Type::IntLiteral(n) => write!(f, "Literal[{n}]"), diff --git a/crates/red_knot_python_semantic/src/types/infer.rs b/crates/red_knot_python_semantic/src/types/infer.rs index 75e7a34f13cca..59e1c91c0c641 100644 --- a/crates/red_knot_python_semantic/src/types/infer.rs +++ b/crates/red_knot_python_semantic/src/types/infer.rs @@ -29,15 +29,16 @@ use ruff_db::parsed::parsed_module; use ruff_python_ast as ast; use ruff_python_ast::{ExprContext, TypeParams}; +use crate::builtins::builtins_scope; use crate::semantic_index::ast_ids::{HasScopedAstId, HasScopedUseId, ScopedExpressionId}; use crate::semantic_index::definition::{Definition, DefinitionKind, DefinitionNodeKey}; use crate::semantic_index::expression::Expression; use crate::semantic_index::semantic_index; -use crate::semantic_index::symbol::NodeWithScopeKind; -use crate::semantic_index::symbol::{NodeWithScopeRef, ScopeId}; +use crate::semantic_index::symbol::{FileScopeId, NodeWithScopeKind, NodeWithScopeRef, ScopeId}; use crate::semantic_index::SemanticIndex; use crate::types::{ - definitions_ty, global_symbol_ty_by_name, ClassType, FunctionType, Name, Type, UnionTypeBuilder, + builtins_symbol_ty_by_name, definitions_ty, global_symbol_ty_by_name, ClassType, FunctionType, + Name, Type, UnionTypeBuilder, }; use crate::Db; @@ -686,7 +687,18 @@ impl<'db> TypeInferenceBuilder<'db> { let symbol = symbols.symbol_by_name(id).unwrap(); if !symbol.is_defined() || !self.scope.is_function_like(self.db) { // implicit global - Some(global_symbol_ty_by_name(self.db, self.file, id)) + let mut unbound_ty = if file_scope_id == FileScopeId::global() { + Type::Unbound + } else { + global_symbol_ty_by_name(self.db, self.file, id) + }; + // fallback to builtins + if matches!(unbound_ty, Type::Unbound) + && Some(self.scope) != builtins_scope(self.db) + { + unbound_ty = builtins_symbol_ty_by_name(self.db, id); + } + Some(unbound_ty) } else { Some(Type::Unbound) } @@ -792,6 +804,7 @@ mod tests { use ruff_db::testing::assert_function_query_was_not_run; use ruff_python_ast::name::Name; + use crate::builtins::builtins_scope; use crate::db::tests::TestDb; use crate::semantic_index::definition::Definition; use crate::semantic_index::semantic_index; @@ -819,6 +832,23 @@ mod tests { db } + fn setup_db_with_custom_typeshed(typeshed: &str) -> TestDb { + let db = TestDb::new(); + + Program::new( + &db, + TargetVersion::Py38, + SearchPathSettings { + extra_paths: Vec::new(), + workspace_root: SystemPathBuf::from("/src"), + site_packages: None, + custom_typeshed: Some(SystemPathBuf::from(typeshed)), + }, + ); + + db + } + fn assert_public_ty(db: &TestDb, file_name: &str, symbol_name: &str, expected: &str) { let file = system_path_to_file(db, file_name).expect("Expected file to exist."); @@ -1370,6 +1400,80 @@ mod tests { Ok(()) } + #[test] + fn builtin_symbol_vendored_stdlib() -> anyhow::Result<()> { + let mut db = setup_db(); + + db.write_file("/src/a.py", "c = copyright")?; + + assert_public_ty(&db, "/src/a.py", "c", "Literal[copyright]"); + + Ok(()) + } + + #[test] + fn builtin_symbol_custom_stdlib() -> anyhow::Result<()> { + let mut db = setup_db_with_custom_typeshed("/typeshed"); + + db.write_files([ + ("/src/a.py", "c = copyright"), + ( + "/typeshed/stdlib/builtins.pyi", + "def copyright() -> None: ...", + ), + ("/typeshed/stdlib/VERSIONS", "builtins: 3.8-"), + ])?; + + assert_public_ty(&db, "/src/a.py", "c", "Literal[copyright]"); + + Ok(()) + } + + #[test] + fn unknown_global_later_defined() -> anyhow::Result<()> { + let mut db = setup_db(); + + db.write_file("/src/a.py", "x = foo; foo = 1")?; + + assert_public_ty(&db, "/src/a.py", "x", "Unbound"); + + Ok(()) + } + + #[test] + fn unknown_builtin_later_defined() -> anyhow::Result<()> { + let mut db = setup_db_with_custom_typeshed("/typeshed"); + + db.write_files([ + ("/src/a.py", "x = foo"), + ("/typeshed/stdlib/builtins.pyi", "foo = bar; bar = 1"), + ("/typeshed/stdlib/VERSIONS", "builtins: 3.8-"), + ])?; + + assert_public_ty(&db, "/src/a.py", "x", "Unbound"); + + Ok(()) + } + + #[test] + fn import_builtins() -> anyhow::Result<()> { + let mut db = setup_db(); + + db.write_file("/src/a.py", "import builtins; x = builtins.copyright")?; + + assert_public_ty(&db, "/src/a.py", "x", "Literal[copyright]"); + // imported builtins module is the same file as the implicit builtins + let file = system_path_to_file(&db, "/src/a.py").expect("Expected file to exist."); + let builtins_ty = global_symbol_ty_by_name(&db, file, "builtins"); + let Type::Module(builtins_file) = builtins_ty else { + panic!("Builtins are not a module?"); + }; + let implicit_builtins_file = builtins_scope(&db).expect("builtins to exist").file(&db); + assert_eq!(builtins_file, implicit_builtins_file); + + Ok(()) + } + fn first_public_def<'db>(db: &'db TestDb, file: File, name: &str) -> Definition<'db> { let scope = global_scope(db, file); *use_def_map(db, scope)