Skip to content

Commit

Permalink
[red-knot] gather type prevalence statistics (#15834)
Browse files Browse the repository at this point in the history
Something Alex and I threw together during our 1:1 this morning. Allows
us to collect statistics on the prevalence of various types in a file,
most usefully TODO types or other dynamic types.
  • Loading branch information
carljm authored Jan 31, 2025
1 parent 44ac17b commit ce769f6
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 0 deletions.
1 change: 1 addition & 0 deletions crates/red_knot_python_semantic/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ mod mro;
mod narrow;
mod signatures;
mod slots;
mod statistics;
mod string_annotation;
mod subclass_of;
mod type_ordering;
Expand Down
9 changes: 9 additions & 0 deletions crates/red_knot_python_semantic/src/types/infer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ use crate::types::diagnostic::{
UNDEFINED_REVEAL, UNRESOLVED_ATTRIBUTE, UNRESOLVED_IMPORT, UNSUPPORTED_OPERATOR,
};
use crate::types::mro::MroErrorKind;
use crate::types::statistics::TypeStatistics;
use crate::types::unpacker::{UnpackResult, Unpacker};
use crate::types::{
builtins_symbol, global_symbol, symbol, symbol_from_bindings, symbol_from_declarations,
Expand Down Expand Up @@ -299,6 +300,14 @@ impl<'db> TypeInference<'db> {
self.diagnostics.shrink_to_fit();
self.deferred.shrink_to_fit();
}

pub(super) fn statistics(&self) -> TypeStatistics {
let mut statistics = TypeStatistics::default();
for ty in self.expressions.values() {
statistics.increment(*ty);
}
statistics
}
}

impl WithDiagnostics for TypeInference<'_> {
Expand Down
121 changes: 121 additions & 0 deletions crates/red_knot_python_semantic/src/types/statistics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
use crate::types::{infer_scope_types, semantic_index, Type};
use crate::Db;
use ruff_db::files::File;
use rustc_hash::FxHashMap;

/// Get type-coverage statistics for a file.
#[salsa::tracked(return_ref)]
pub fn type_statistics<'db>(db: &'db dyn Db, file: File) -> TypeStatistics<'db> {
let _span = tracing::trace_span!("type_statistics", file=?file.path(db)).entered();

tracing::debug!(
"Gathering statistics for file '{path}'",
path = file.path(db)
);

let index = semantic_index(db, file);
let mut statistics = TypeStatistics::default();

for scope_id in index.scope_ids() {
let result = infer_scope_types(db, scope_id);
statistics.extend(&result.statistics());
}

statistics
}

/// Map each type to count of expressions with that type.
#[derive(Debug, Default, Eq, PartialEq)]
pub(super) struct TypeStatistics<'db>(FxHashMap<Type<'db>, u32>);

impl<'db> TypeStatistics<'db> {
fn extend(&mut self, other: &TypeStatistics<'db>) {
for (ty, count) in &other.0 {
self.0
.entry(*ty)
.and_modify(|my_count| *my_count += count)
.or_insert(*count);
}
}

pub(super) fn increment(&mut self, ty: Type<'db>) {
self.0
.entry(ty)
.and_modify(|count| *count += 1)
.or_insert(1);
}

#[allow(unused)]
fn expression_count(&self) -> u32 {
self.0.values().sum()
}

#[allow(unused)]
fn todo_count(&self) -> u32 {
self.0
.iter()
.filter(|(key, _)| key.is_todo())
.map(|(_, count)| count)
.sum()
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::db::tests::{setup_db, TestDb};
use ruff_db::files::system_path_to_file;
use ruff_db::system::DbWithTestSystem;

fn get_stats<'db>(
db: &'db mut TestDb,
filename: &str,
source: &str,
) -> &'db TypeStatistics<'db> {
db.write_dedented(filename, source).unwrap();

type_statistics(db, system_path_to_file(db, filename).unwrap())
}

#[test]
fn all_static() {
let mut db = setup_db();

let stats = get_stats(&mut db, "src/foo.py", "1");

assert_eq!(stats.0, FxHashMap::from_iter([(Type::IntLiteral(1), 1)]));
}

#[test]
fn todo_and_expression_count() {
let mut db = setup_db();

let stats = get_stats(
&mut db,
"src/foo.py",
r#"
x = [x for x in [1]]
"#,
);

assert_eq!(stats.todo_count(), 4);
assert_eq!(stats.expression_count(), 6);
}

#[test]
fn sum() {
let mut db = setup_db();

let stats = get_stats(
&mut db,
"src/foo.py",
r#"
1
def f():
1
"#,
);

assert_eq!(stats.0[&Type::IntLiteral(1)], 2);
}
}

0 comments on commit ce769f6

Please sign in to comment.