Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

red-knot[salsa part 2]: Setup semantic DB and Jar #11837

Merged
merged 5 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 7 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ license = "MIT"
[workspace.dependencies]
ruff = { path = "crates/ruff" }
ruff_cache = { path = "crates/ruff_cache" }
ruff_db = { path = "crates/ruff_db" }
ruff_diagnostics = { path = "crates/ruff_diagnostics" }
ruff_formatter = { path = "crates/ruff_formatter" }
ruff_index = { path = "crates/ruff_index" }
Expand Down Expand Up @@ -81,7 +82,7 @@ libcst = { version = "1.1.0", default-features = false }
log = { version = "0.4.17" }
lsp-server = { version = "0.7.6" }
lsp-types = { git = "https://github.com/astral-sh/lsp-types.git", rev = "3512a9f", features = [
"proposed",
"proposed",
] }
matchit = { version = "0.8.1" }
memchr = { version = "2.7.1" }
Expand Down Expand Up @@ -111,7 +112,7 @@ serde-wasm-bindgen = { version = "0.6.4" }
serde_json = { version = "1.0.113" }
serde_test = { version = "1.0.152" }
serde_with = { version = "3.6.0", default-features = false, features = [
"macros",
"macros",
] }
shellexpand = { version = "3.0.0" }
similar = { version = "2.4.0", features = ["inline"] }
Expand All @@ -138,10 +139,10 @@ unicode-normalization = { version = "0.1.23" }
ureq = { version = "2.9.6" }
url = { version = "2.5.0" }
uuid = { version = "1.6.1", features = [
"v4",
"fast-rng",
"macro-diagnostics",
"js",
"v4",
"fast-rng",
"macro-diagnostics",
"js",
] }
walkdir = { version = "2.3.2" }
wasm-bindgen = { version = "0.2.92" }
Expand Down
36 changes: 27 additions & 9 deletions crates/ruff_db/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,21 +50,22 @@ mod tests {
use crate::file_system::{FileSystem, MemoryFileSystem};
use crate::vfs::{VendoredPathBuf, Vfs};
use crate::{Db, Jar};
use salsa::DebugWithDb;
use std::sync::Arc;

/// Database that can be used for testing.
///
/// Uses an in memory filesystem and it stubs out the vendored files by default.
#[salsa::db(Jar)]
pub struct TestDb {
pub(crate) struct TestDb {
storage: salsa::Storage<Self>,
vfs: Vfs,
file_system: MemoryFileSystem,
events: std::sync::Arc<std::sync::Mutex<Vec<salsa::Event>>>,
}

impl TestDb {
#[allow(unused)]
pub fn new() -> Self {
pub(crate) fn new() -> Self {
let mut vfs = Vfs::default();
vfs.stub_vendored::<VendoredPathBuf, String>([]);

Expand All @@ -77,20 +78,37 @@ mod tests {
}

#[allow(unused)]
pub fn file_system(&self) -> &MemoryFileSystem {
pub(crate) fn file_system(&self) -> &MemoryFileSystem {
&self.file_system
}

/// Empties the internal store of salsa events that have been emitted,
/// and returns them as a `Vec` (equivalent to [`std::mem::take`]).
///
/// ## Panics
/// If there are pending database snapshots.
#[allow(unused)]
pub(crate) fn take_salsa_events(&mut self) -> Vec<salsa::Event> {
let inner = Arc::get_mut(&mut self.events)
.expect("expected no pending salsa database snapshots.");

std::mem::take(inner.get_mut().unwrap())
}

/// Clears the emitted salsa events.
///
/// ## Panics
/// If there are pending database snapshots.
#[allow(unused)]
pub fn events(&self) -> std::sync::Arc<std::sync::Mutex<Vec<salsa::Event>>> {
self.events.clone()
pub(crate) fn clear_salsa_events(&mut self) {
self.take_salsa_events();
}

pub fn file_system_mut(&mut self) -> &mut MemoryFileSystem {
pub(crate) fn file_system_mut(&mut self) -> &mut MemoryFileSystem {
&mut self.file_system
}

pub fn vfs_mut(&mut self) -> &mut Vfs {
pub(crate) fn vfs_mut(&mut self) -> &mut Vfs {
&mut self.vfs
}
}
Expand All @@ -107,7 +125,7 @@ mod tests {

impl salsa::Database for TestDb {
fn salsa_event(&self, event: salsa::Event) {
tracing::trace!("event: {:?}", event);
tracing::trace!("event: {:?}", event.debug(self));
let mut events = self.events.lock().unwrap();
events.push(event);
}
Expand Down
128 changes: 128 additions & 0 deletions crates/ruff_db/src/source.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
use std::ops::Deref;
use std::sync::Arc;

use ruff_source_file::LineIndex;

use crate::vfs::VfsFile;
use crate::Db;

/// Reads the content of file.
#[salsa::tracked]
pub fn source_text(db: &dyn Db, file: VfsFile) -> SourceText {
let content = file.read(db);

SourceText {
inner: Arc::from(content),
}
}

/// Computes the [`LineIndex`] for `file`.
#[salsa::tracked]
pub fn line_index(db: &dyn Db, file: VfsFile) -> LineIndex {
let source = source_text(db, file);

LineIndex::from_source_text(&source)
}

/// The source text of a [`VfsFile`].
///
/// Cheap cloneable in `O(1)`.
#[derive(Clone, Eq, PartialEq)]
pub struct SourceText {
inner: Arc<str>,
}

impl SourceText {
pub fn as_str(&self) -> &str {
&self.inner
}
}

impl Deref for SourceText {
type Target = str;

fn deref(&self) -> &str {
self.as_str()
}
}

impl std::fmt::Debug for SourceText {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("SourceText").field(&self.inner).finish()
}
}

#[cfg(test)]
mod tests {
use filetime::FileTime;
use salsa::EventKind;

use ruff_source_file::OneIndexed;
use ruff_text_size::TextSize;

use crate::file_system::FileSystemPath;
use crate::source::{line_index, source_text};
use crate::tests::TestDb;
use crate::Db;

#[test]
fn re_runs_query_when_file_revision_changes() {
let mut db = TestDb::new();
let path = FileSystemPath::new("test.py");

db.file_system_mut().write_file(path, "x = 10".to_string());

let file = db.file(path);

assert_eq!(&*source_text(&db, file), "x = 10");

db.file_system_mut().write_file(path, "x = 20".to_string());
file.set_revision(&mut db).to(FileTime::now().into());
Comment on lines +79 to +80
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's surprising to me that we have to manually update the revision here; should write_file do that itself?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The FileSystem that I have in mind should be independent of the vfs and salsa db. It's just an abstraction over std::fs. What I have in mind to avoid the set_revision call here is that we have an apply_changes method where we pass all file changes (adds, remove, modified) that updates the vfs state. It's probably the FileSystem's or the host's responsibility to collect all made changes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That makes sense, and would address my concern: I don't want to expose/use APIs that make it easy to change a file's contents but forget to bump its revision.

I think this also gets back to what I find confusing about the Vfs and FileSystem structure, which is that there is no single entity which really encapsulates our virtual file system. Where does this apply_changes method live? I guess it is just a free function that takes a Db and some changes to apply?

Probably this is just something I have to get used to with Salsa, that the Db owns all the state, so most API that we use for managing state is just a function that takes a Db.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where does this apply_changes method live? I guess it is just a free function that takes a Db and some changes to apply?

It would probably be a free standing function that takes &mut Db. And yes, the fact that it is a free standing function takes some time to get used to. An alternative is to make it a method in Vfs, but I think that won't work because it would require holding a mutable reference to Db and a read only reference to FileSystem.

Probably this is just something I have to get used to with Salsa, that the Db owns all the state, so most API that we use for managing state is just a function that takes a Db.

yes, that takes some time to get used to. It's also something I'm still figuring out


assert_eq!(&*source_text(&db, file), "x = 20");
}

#[test]
fn text_is_cached_if_revision_is_unchanged() {
let mut db = TestDb::new();
let path = FileSystemPath::new("test.py");

db.file_system_mut().write_file(path, "x = 10".to_string());

let file = db.file(path);

assert_eq!(&*source_text(&db, file), "x = 10");

// Change the file permission only
file.set_permissions(&mut db).to(Some(0o777));

db.events().lock().unwrap().clear();
assert_eq!(&*source_text(&db, file), "x = 10");

let events = db.events();
let events = events.lock().unwrap();

assert!(!events
.iter()
.any(|event| matches!(event.kind, EventKind::WillExecute { .. })));
}

#[test]
fn line_index_for_source() {
let mut db = TestDb::new();
let path = FileSystemPath::new("test.py");

db.file_system_mut()
.write_file(path, "x = 10\ny = 20".to_string());

let file = db.file(path);
let index = line_index(&db, file);
let text = source_text(&db, file);

assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_start(OneIndexed::from_zero_indexed(0), &text),
TextSize::new(0)
);
}
}
6 changes: 6 additions & 0 deletions crates/ruff_python_semantic/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ license = { workspace = true }
doctest = false

[dependencies]
ruff_db = { workspace = true, optional = true }
ruff_index = { workspace = true }
ruff_python_ast = { workspace = true }
ruff_python_stdlib = { workspace = true }
Expand All @@ -22,10 +23,15 @@ ruff_text_size = { workspace = true }

bitflags = { workspace = true }
is-macro = { workspace = true }
salsa = { workspace = true, optional = true }
tracing = { workspace = true, optional = true }
rustc-hash = { workspace = true }

[dev-dependencies]
ruff_python_parser = { workspace = true }

[lints]
workspace = true

[features]
red_knot = ["dep:ruff_db", "dep:salsa", "dep:tracing"]
91 changes: 91 additions & 0 deletions crates/ruff_python_semantic/src/db.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
use ruff_db::{Db as SourceDb, Upcast};
use salsa::DbWithJar;

// Salsa doesn't support a struct without fields, so allow the clippy lint for now.
#[allow(clippy::empty_structs_with_brackets)]
#[salsa::jar(db=Db)]
pub struct Jar();

/// Database giving access to semantic information about a Python program.
pub trait Db: SourceDb + DbWithJar<Jar> + Upcast<dyn SourceDb> {}

#[cfg(test)]
mod tests {
use super::{Db, Jar};
use ruff_db::file_system::{FileSystem, MemoryFileSystem};
use ruff_db::vfs::Vfs;
use ruff_db::{Db as SourceDb, Jar as SourceJar, Upcast};
use salsa::DebugWithDb;

#[salsa::db(Jar, SourceJar)]
pub(crate) struct TestDb {
storage: salsa::Storage<Self>,
vfs: Vfs,
file_system: MemoryFileSystem,
events: std::sync::Arc<std::sync::Mutex<Vec<salsa::Event>>>,
}

impl TestDb {
#[allow(unused)]
pub(crate) fn new() -> Self {
Self {
storage: salsa::Storage::default(),
file_system: MemoryFileSystem::default(),
events: std::sync::Arc::default(),
vfs: Vfs::with_stubbed_vendored(),
}
}

#[allow(unused)]
pub(crate) fn memory_file_system(&self) -> &MemoryFileSystem {
&self.file_system
}

#[allow(unused)]
pub(crate) fn memory_file_system_mut(&mut self) -> &mut MemoryFileSystem {
&mut self.file_system
}

#[allow(unused)]
pub(crate) fn vfs_mut(&mut self) -> &mut Vfs {
&mut self.vfs
}
}

impl SourceDb for TestDb {
fn file_system(&self) -> &dyn FileSystem {
&self.file_system
}

fn vfs(&self) -> &Vfs {
&self.vfs
}
}

impl Upcast<dyn SourceDb> for TestDb {
fn upcast(&self) -> &(dyn SourceDb + 'static) {
self
}
}

impl Db for TestDb {}

impl salsa::Database for TestDb {
fn salsa_event(&self, event: salsa::Event) {
tracing::trace!("event: {:?}", event.debug(self));
let mut events = self.events.lock().unwrap();
events.push(event);
}
}

impl salsa::ParallelDatabase for TestDb {
fn snapshot(&self) -> salsa::Snapshot<Self> {
salsa::Snapshot::new(Self {
storage: self.storage.snapshot(),
vfs: self.vfs.snapshot(),
file_system: self.file_system.snapshot(),
events: self.events.clone(),
})
}
}
}
5 changes: 5 additions & 0 deletions crates/ruff_python_semantic/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ pub mod analyze;
mod binding;
mod branches;
mod context;
#[cfg(feature = "red_knot")]
mod db;
mod definition;
mod globals;
mod model;
Expand All @@ -20,3 +22,6 @@ pub use nodes::*;
pub use reference::*;
pub use scope::*;
pub use star_import::*;

#[cfg(feature = "red_knot")]
pub use db::{Db, Jar};
Loading