From 413bbab3d3a763a1abd600e3e8cf789936cfec74 Mon Sep 17 00:00:00 2001 From: epgif Date: Fri, 6 Jun 2025 19:32:14 -0500 Subject: [PATCH 1/3] feat: add SchemaProvider::table_type(table_name: &str) InformationSchemaConfig::make_tables only needs the TableType not the whole TableProvider, and the former may require an expensive catalog operation to construct and the latter may not. This allows avoiding `SELECT * FROM information_schema.tables` having to make 1 of those potentially expensive operations per table. --- datafusion/catalog/src/information_schema.rs | 6 ++++-- datafusion/catalog/src/schema.rs | 9 +++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/datafusion/catalog/src/information_schema.rs b/datafusion/catalog/src/information_schema.rs index 057d1a819882..bd58d2e33f5b 100644 --- a/datafusion/catalog/src/information_schema.rs +++ b/datafusion/catalog/src/information_schema.rs @@ -103,12 +103,14 @@ impl InformationSchemaConfig { // schema name may not exist in the catalog, so we need to check if let Some(schema) = catalog.schema(&schema_name) { for table_name in schema.table_names() { - if let Some(table) = schema.table(&table_name).await? { + if let Some(table_type) = + schema.table_type(&table_name).await? + { builder.add_table( &catalog_name, &schema_name, &table_name, - table.table_type(), + table_type, ); } } diff --git a/datafusion/catalog/src/schema.rs b/datafusion/catalog/src/schema.rs index 5b37348fd742..9ba55256f182 100644 --- a/datafusion/catalog/src/schema.rs +++ b/datafusion/catalog/src/schema.rs @@ -26,6 +26,7 @@ use std::sync::Arc; use crate::table::TableProvider; use datafusion_common::Result; +use datafusion_expr::TableType; /// Represents a schema, comprising a number of named tables. /// @@ -54,6 +55,14 @@ pub trait SchemaProvider: Debug + Sync + Send { name: &str, ) -> Result>, DataFusionError>; + /// Retrieves the type of a specific table from the schema by name, if it exists, otherwise + /// returns `None`. Implementations for which this operation is cheap but [Self::table] is + /// expensive can override this to improve operations that only need the type, e.g. + /// `SELECT * FROM information_schema.tables`. + async fn table_type(&self, name: &str) -> Result> { + self.table(name).await.map(|o| o.map(|t| t.table_type())) + } + /// If supported by the implementation, adds a new table named `name` to /// this schema. /// From 50e2ecb58271540ec64dfcba7705d08829a005ad Mon Sep 17 00:00:00 2001 From: epgif Date: Tue, 17 Jun 2025 17:59:28 -0500 Subject: [PATCH 2/3] test: new InformationSchemaConfig::make_tables behavior --- datafusion/catalog/src/information_schema.rs | 3 + .../catalog/src/information_schema/tests.rs | 88 +++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100644 datafusion/catalog/src/information_schema/tests.rs diff --git a/datafusion/catalog/src/information_schema.rs b/datafusion/catalog/src/information_schema.rs index bd58d2e33f5b..e8a1e04caef2 100644 --- a/datafusion/catalog/src/information_schema.rs +++ b/datafusion/catalog/src/information_schema.rs @@ -1361,3 +1361,6 @@ impl PartitionStream for InformationSchemaParameters { )) } } + +#[cfg(test)] +mod tests; diff --git a/datafusion/catalog/src/information_schema/tests.rs b/datafusion/catalog/src/information_schema/tests.rs new file mode 100644 index 000000000000..045fa54a7533 --- /dev/null +++ b/datafusion/catalog/src/information_schema/tests.rs @@ -0,0 +1,88 @@ +use std::sync::Arc; + +use crate::CatalogProvider; + +use super::*; + +#[tokio::test] +async fn make_tables_uses_table_type() { + let config = InformationSchemaConfig { + catalog_list: Arc::new(Fixture), + }; + let mut builder = InformationSchemaTablesBuilder { + catalog_names: StringBuilder::new(), + schema_names: StringBuilder::new(), + table_names: StringBuilder::new(), + table_types: StringBuilder::new(), + schema: Arc::new(Schema::empty()), + }; + + assert!(config.make_tables(&mut builder).await.is_ok()); + + assert_eq!("BASE TABLE", builder.table_types.finish().value(0)); +} + +#[derive(Debug)] +struct Fixture; + +#[async_trait] +impl SchemaProvider for Fixture { + // InformationSchemaConfig::make_tables should use this. + async fn table_type(&self, _: &str) -> Result> { + Ok(Some(TableType::Base)) + } + + // InformationSchemaConfig::make_tables used this before `table_type` + // existed but should not, as it may be expensive. + async fn table(&self, _: &str) -> Result>> { + panic!("InformationSchemaConfig::make_tables called SchemaProvider::table instead of table_type") + } + + fn as_any(&self) -> &dyn Any { + unimplemented!("not required for these tests") + } + + fn table_names(&self) -> Vec { + vec!["atable".to_string()] + } + + fn table_exist(&self, _: &str) -> bool { + unimplemented!("not required for these tests") + } +} + +impl CatalogProviderList for Fixture { + fn as_any(&self) -> &dyn Any { + unimplemented!("not required for these tests") + } + + fn register_catalog( + &self, + _: String, + _: Arc, + ) -> Option> { + unimplemented!("not required for these tests") + } + + fn catalog_names(&self) -> Vec { + vec!["acatalog".to_string()] + } + + fn catalog(&self, _: &str) -> Option> { + Some(Arc::new(Self)) + } +} + +impl CatalogProvider for Fixture { + fn as_any(&self) -> &dyn Any { + unimplemented!("not required for these tests") + } + + fn schema_names(&self) -> Vec { + vec!["aschema".to_string()] + } + + fn schema(&self, _: &str) -> Option> { + Some(Arc::new(Self)) + } +} From 2bc5e3acb69f085c4e8c844aca6318a3765e73a1 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 18 Jun 2025 16:10:50 -0400 Subject: [PATCH 3/3] Move tests to same file to fix CI --- datafusion/catalog/src/information_schema.rs | 88 ++++++++++++++++++- .../catalog/src/information_schema/tests.rs | 88 ------------------- 2 files changed, 87 insertions(+), 89 deletions(-) delete mode 100644 datafusion/catalog/src/information_schema/tests.rs diff --git a/datafusion/catalog/src/information_schema.rs b/datafusion/catalog/src/information_schema.rs index e8a1e04caef2..83b6d64ef47b 100644 --- a/datafusion/catalog/src/information_schema.rs +++ b/datafusion/catalog/src/information_schema.rs @@ -1363,4 +1363,90 @@ impl PartitionStream for InformationSchemaParameters { } #[cfg(test)] -mod tests; +mod tests { + use super::*; + use crate::CatalogProvider; + + #[tokio::test] + async fn make_tables_uses_table_type() { + let config = InformationSchemaConfig { + catalog_list: Arc::new(Fixture), + }; + let mut builder = InformationSchemaTablesBuilder { + catalog_names: StringBuilder::new(), + schema_names: StringBuilder::new(), + table_names: StringBuilder::new(), + table_types: StringBuilder::new(), + schema: Arc::new(Schema::empty()), + }; + + assert!(config.make_tables(&mut builder).await.is_ok()); + + assert_eq!("BASE TABLE", builder.table_types.finish().value(0)); + } + + #[derive(Debug)] + struct Fixture; + + #[async_trait] + impl SchemaProvider for Fixture { + // InformationSchemaConfig::make_tables should use this. + async fn table_type(&self, _: &str) -> Result> { + Ok(Some(TableType::Base)) + } + + // InformationSchemaConfig::make_tables used this before `table_type` + // existed but should not, as it may be expensive. + async fn table(&self, _: &str) -> Result>> { + panic!("InformationSchemaConfig::make_tables called SchemaProvider::table instead of table_type") + } + + fn as_any(&self) -> &dyn Any { + unimplemented!("not required for these tests") + } + + fn table_names(&self) -> Vec { + vec!["atable".to_string()] + } + + fn table_exist(&self, _: &str) -> bool { + unimplemented!("not required for these tests") + } + } + + impl CatalogProviderList for Fixture { + fn as_any(&self) -> &dyn Any { + unimplemented!("not required for these tests") + } + + fn register_catalog( + &self, + _: String, + _: Arc, + ) -> Option> { + unimplemented!("not required for these tests") + } + + fn catalog_names(&self) -> Vec { + vec!["acatalog".to_string()] + } + + fn catalog(&self, _: &str) -> Option> { + Some(Arc::new(Self)) + } + } + + impl CatalogProvider for Fixture { + fn as_any(&self) -> &dyn Any { + unimplemented!("not required for these tests") + } + + fn schema_names(&self) -> Vec { + vec!["aschema".to_string()] + } + + fn schema(&self, _: &str) -> Option> { + Some(Arc::new(Self)) + } + } +} diff --git a/datafusion/catalog/src/information_schema/tests.rs b/datafusion/catalog/src/information_schema/tests.rs deleted file mode 100644 index 045fa54a7533..000000000000 --- a/datafusion/catalog/src/information_schema/tests.rs +++ /dev/null @@ -1,88 +0,0 @@ -use std::sync::Arc; - -use crate::CatalogProvider; - -use super::*; - -#[tokio::test] -async fn make_tables_uses_table_type() { - let config = InformationSchemaConfig { - catalog_list: Arc::new(Fixture), - }; - let mut builder = InformationSchemaTablesBuilder { - catalog_names: StringBuilder::new(), - schema_names: StringBuilder::new(), - table_names: StringBuilder::new(), - table_types: StringBuilder::new(), - schema: Arc::new(Schema::empty()), - }; - - assert!(config.make_tables(&mut builder).await.is_ok()); - - assert_eq!("BASE TABLE", builder.table_types.finish().value(0)); -} - -#[derive(Debug)] -struct Fixture; - -#[async_trait] -impl SchemaProvider for Fixture { - // InformationSchemaConfig::make_tables should use this. - async fn table_type(&self, _: &str) -> Result> { - Ok(Some(TableType::Base)) - } - - // InformationSchemaConfig::make_tables used this before `table_type` - // existed but should not, as it may be expensive. - async fn table(&self, _: &str) -> Result>> { - panic!("InformationSchemaConfig::make_tables called SchemaProvider::table instead of table_type") - } - - fn as_any(&self) -> &dyn Any { - unimplemented!("not required for these tests") - } - - fn table_names(&self) -> Vec { - vec!["atable".to_string()] - } - - fn table_exist(&self, _: &str) -> bool { - unimplemented!("not required for these tests") - } -} - -impl CatalogProviderList for Fixture { - fn as_any(&self) -> &dyn Any { - unimplemented!("not required for these tests") - } - - fn register_catalog( - &self, - _: String, - _: Arc, - ) -> Option> { - unimplemented!("not required for these tests") - } - - fn catalog_names(&self) -> Vec { - vec!["acatalog".to_string()] - } - - fn catalog(&self, _: &str) -> Option> { - Some(Arc::new(Self)) - } -} - -impl CatalogProvider for Fixture { - fn as_any(&self) -> &dyn Any { - unimplemented!("not required for these tests") - } - - fn schema_names(&self) -> Vec { - vec!["aschema".to_string()] - } - - fn schema(&self, _: &str) -> Option> { - Some(Arc::new(Self)) - } -}