From d825c12d84f913f8913c89efd15e734c60a9cf65 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 26 Sep 2024 23:43:42 -0700 Subject: [PATCH 1/9] Make support schemas --- datafusion/core/src/catalog_common/mod.rs | 7 ++++--- datafusion/sql/src/statement.rs | 9 +++++++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/datafusion/core/src/catalog_common/mod.rs b/datafusion/core/src/catalog_common/mod.rs index b8414378862e..78b9ddff3611 100644 --- a/datafusion/core/src/catalog_common/mod.rs +++ b/datafusion/core/src/catalog_common/mod.rs @@ -185,9 +185,8 @@ pub fn resolve_table_references( let _ = s.as_ref().visit(visitor); } DFStatement::CreateExternalTable(table) => { - visitor - .relations - .insert(ObjectName(vec![Ident::from(table.name.as_str())])); + let idents: Vec = table.name.split('.').map(Ident::from).collect(); + visitor.relations.insert(ObjectName(idents)); } DFStatement::CopyTo(CopyToStatement { source, .. }) => match source { CopyToSource::Relation(table_name) => { @@ -213,6 +212,8 @@ pub fn resolve_table_references( .into_iter() .map(|x| object_name_to_table_reference(x, enable_ident_normalization)) .collect::>()?; + println!("Table refs: {:?}", table_refs); + println!("ctes : {:?}", ctes); Ok((table_refs, ctes)) } diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 29dfe25993f1..22f989fed4c1 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -25,7 +25,8 @@ use crate::parser::{ LexOrdering, Statement as DFStatement, }; use crate::planner::{ - object_name_to_qualifier, ContextProvider, PlannerContext, SqlToRel, + object_name_to_qualifier, object_name_to_table_reference, ContextProvider, + PlannerContext, SqlToRel, }; use crate::utils::normalize_ident; @@ -1240,7 +1241,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { self.build_order_by(order_exprs, &df_schema, &mut planner_context)?; // External tables do not support schemas at the moment, so the name is just a table name - let name = TableReference::bare(name); + // + let idents: Vec = name.split('.').map(Ident::from).collect(); + let obj = ObjectName(idents); + let name = object_name_to_table_reference(obj, false)?; + // let name = TableReference::bare(name); let constraints = Constraints::new_from_table_constraints(&all_constraints, &df_schema)?; Ok(LogicalPlan::Ddl(DdlStatement::CreateExternalTable( From 187151ff51002eb9f17a672d22bfe85ad71f7603 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Thu, 26 Sep 2024 23:52:59 -0700 Subject: [PATCH 2/9] Set default name to table --- datafusion/sql/src/statement.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 22f989fed4c1..b475bfad6ee7 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -1244,7 +1244,8 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { // let idents: Vec = name.split('.').map(Ident::from).collect(); let obj = ObjectName(idents); - let name = object_name_to_table_reference(obj, false)?; + let name = object_name_to_table_reference(obj, false) + .unwrap_or(TableReference::bare(name)); // let name = TableReference::bare(name); let constraints = Constraints::new_from_table_constraints(&all_constraints, &df_schema)?; From 170f239550491b116ae51cffde46215053bd2588 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Fri, 27 Sep 2024 00:08:35 -0700 Subject: [PATCH 3/9] Remove print statements and stale comment --- datafusion/core/src/catalog_common/mod.rs | 2 -- datafusion/sql/src/statement.rs | 6 +----- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/datafusion/core/src/catalog_common/mod.rs b/datafusion/core/src/catalog_common/mod.rs index 78b9ddff3611..e272894177a0 100644 --- a/datafusion/core/src/catalog_common/mod.rs +++ b/datafusion/core/src/catalog_common/mod.rs @@ -212,8 +212,6 @@ pub fn resolve_table_references( .into_iter() .map(|x| object_name_to_table_reference(x, enable_ident_normalization)) .collect::>()?; - println!("Table refs: {:?}", table_refs); - println!("ctes : {:?}", ctes); Ok((table_refs, ctes)) } diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index b475bfad6ee7..cd120b349117 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -1240,13 +1240,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let ordered_exprs = self.build_order_by(order_exprs, &df_schema, &mut planner_context)?; - // External tables do not support schemas at the moment, so the name is just a table name - // let idents: Vec = name.split('.').map(Ident::from).collect(); - let obj = ObjectName(idents); - let name = object_name_to_table_reference(obj, false) + let name = object_name_to_table_reference(ObjectName(idents), false) .unwrap_or(TableReference::bare(name)); - // let name = TableReference::bare(name); let constraints = Constraints::new_from_table_constraints(&all_constraints, &df_schema)?; Ok(LogicalPlan::Ddl(DdlStatement::CreateExternalTable( From 88e90d2e6cd0ecb3120ce357a900c5f870061453 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Sat, 28 Sep 2024 21:43:34 -0700 Subject: [PATCH 4/9] Add tests for create table --- datafusion/core/src/catalog_common/mod.rs | 3 +++ datafusion/sql/tests/sql_integration.rs | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/datafusion/core/src/catalog_common/mod.rs b/datafusion/core/src/catalog_common/mod.rs index e272894177a0..fa9a28e45613 100644 --- a/datafusion/core/src/catalog_common/mod.rs +++ b/datafusion/core/src/catalog_common/mod.rs @@ -187,6 +187,9 @@ pub fn resolve_table_references( DFStatement::CreateExternalTable(table) => { let idents: Vec = table.name.split('.').map(Ident::from).collect(); visitor.relations.insert(ObjectName(idents)); + visitor + .relations + .insert(ObjectName(vec![Ident::from(table.name.as_str())])); } DFStatement::CopyTo(CopyToStatement { source, .. }) => match source { CopyToSource::Relation(table_name) => { diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 5c9655a55606..ca5cee740f2e 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -1913,6 +1913,13 @@ fn create_external_table_with_pk() { quick_test(sql, expected); } +#[test] +fn create_external_table_wih_schema() { + let sql = "CREATE EXTERNAL TABLE staging.foo STORED AS CSV LOCATION 'foo.csv'"; + let expected = "CreateExternalTable: Partial { schema: \"staging\", table: \"foo\" }"; + quick_test(sql, expected); +} + #[test] fn create_schema_with_quoted_name() { let sql = "CREATE SCHEMA \"quoted_schema_name\""; @@ -1949,7 +1956,7 @@ fn create_external_table_csv_no_schema() { } #[test] -fn create_external_table_with_compression_type() { +fn create_external_table_with_compression_typet() { // positive case let sqls = vec![ "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv.gz' OPTIONS ('format.compression' 'gzip')", From b7576b9a6838f77c30431787477daa5018aa122b Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Sat, 28 Sep 2024 21:44:25 -0700 Subject: [PATCH 5/9] Fix typo --- datafusion/sql/tests/sql_integration.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index ca5cee740f2e..44b591fedef8 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -1956,7 +1956,7 @@ fn create_external_table_csv_no_schema() { } #[test] -fn create_external_table_with_compression_typet() { +fn create_external_table_with_compression_type() { // positive case let sqls = vec![ "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv.gz' OPTIONS ('format.compression' 'gzip')", From 2d207155b7a0c95c2d11ef14e03afb48e0776538 Mon Sep 17 00:00:00 2001 From: OussamaSaoudi <45303303+OussamaSaoudi@users.noreply.github.com> Date: Sun, 29 Sep 2024 15:18:25 -0400 Subject: [PATCH 6/9] Update datafusion/sql/src/statement.rs Co-authored-by: Jonah Gao --- datafusion/sql/src/statement.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index cd120b349117..2cea95647ca8 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -1241,8 +1241,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { self.build_order_by(order_exprs, &df_schema, &mut planner_context)?; let idents: Vec = name.split('.').map(Ident::from).collect(); - let name = object_name_to_table_reference(ObjectName(idents), false) - .unwrap_or(TableReference::bare(name)); + let name = self.object_name_to_table_reference(name)?; let constraints = Constraints::new_from_table_constraints(&all_constraints, &df_schema)?; Ok(LogicalPlan::Ddl(DdlStatement::CreateExternalTable( From 57977efc517f0596b2e1a2be2efedec34f7b670e Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Sun, 29 Sep 2024 23:25:08 -0700 Subject: [PATCH 7/9] convert create_external_table to objectname --- datafusion/core/src/catalog_common/mod.rs | 6 +----- datafusion/sql/src/parser.rs | 4 ++-- datafusion/sql/src/statement.rs | 4 +--- 3 files changed, 4 insertions(+), 10 deletions(-) diff --git a/datafusion/core/src/catalog_common/mod.rs b/datafusion/core/src/catalog_common/mod.rs index fa9a28e45613..85207845a005 100644 --- a/datafusion/core/src/catalog_common/mod.rs +++ b/datafusion/core/src/catalog_common/mod.rs @@ -185,11 +185,7 @@ pub fn resolve_table_references( let _ = s.as_ref().visit(visitor); } DFStatement::CreateExternalTable(table) => { - let idents: Vec = table.name.split('.').map(Ident::from).collect(); - visitor.relations.insert(ObjectName(idents)); - visitor - .relations - .insert(ObjectName(vec![Ident::from(table.name.as_str())])); + visitor.relations.insert(table.name.clone()); } DFStatement::CopyTo(CopyToStatement { source, .. }) => match source { CopyToSource::Relation(table_name) => { diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index 2df8d89c59bc..f7680aa4483d 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -181,7 +181,7 @@ pub(crate) type LexOrdering = Vec; #[derive(Debug, Clone, PartialEq, Eq)] pub struct CreateExternalTable { /// Table name - pub name: String, + pub name: ObjectName, /// Optional schema pub columns: Vec, /// File type (Parquet, NDJSON, CSV, etc) @@ -813,7 +813,7 @@ impl<'a> DFParser<'a> { } let create = CreateExternalTable { - name: table_name.to_string(), + name: table_name, columns, file_type: builder.file_type.unwrap(), location: builder.location.unwrap(), diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs index 2cea95647ca8..33d833c7b8f3 100644 --- a/datafusion/sql/src/statement.rs +++ b/datafusion/sql/src/statement.rs @@ -25,8 +25,7 @@ use crate::parser::{ LexOrdering, Statement as DFStatement, }; use crate::planner::{ - object_name_to_qualifier, object_name_to_table_reference, ContextProvider, - PlannerContext, SqlToRel, + object_name_to_qualifier, ContextProvider, PlannerContext, SqlToRel, }; use crate::utils::normalize_ident; @@ -1240,7 +1239,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { let ordered_exprs = self.build_order_by(order_exprs, &df_schema, &mut planner_context)?; - let idents: Vec = name.split('.').map(Ident::from).collect(); let name = self.object_name_to_table_reference(name)?; let constraints = Constraints::new_from_table_constraints(&all_constraints, &df_schema)?; From e00b5c896a27fd2d29da59eaedca6a08d4a41213 Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 30 Sep 2024 23:02:04 -0700 Subject: [PATCH 8/9] Add sqllogic tests --- .../test_files/create_external_table.slt | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt index 12b097c3d5d1..9ac2ecdce7cc 100644 --- a/datafusion/sqllogictest/test_files/create_external_table.slt +++ b/datafusion/sqllogictest/test_files/create_external_table.slt @@ -275,3 +275,15 @@ DROP TABLE t; # query should fail with bad column statement error DataFusion error: Error during planning: Column foo is not in schema CREATE EXTERNAL TABLE t STORED AS parquet LOCATION '../../parquet-testing/data/alltypes_plain.parquet' WITH ORDER (foo); + +# Create external table with qualified name should belong to the schema +statement ok +CREATE SCHEMA staging; + +statement ok +CREATE EXTERNAL TABLE staging.foo STORED AS parquet LOCATION '../../parquet-testing/data/alltypes_plain.parquet'; + +# Create external table with qualified name, but no schema should error +statement error DataFusion error: Error during planning: failed to resolve schema: release +CREATE EXTERNAL TABLE release.bar STORED AS parquet LOCATION '../../parquet-testing/data/alltypes_plain.parquet'; + From b1e6e69f8ea07019f3423a33d65537009a4a3d4e Mon Sep 17 00:00:00 2001 From: Oussama Saoudi Date: Mon, 30 Sep 2024 23:54:09 -0700 Subject: [PATCH 9/9] Fix failing tests --- datafusion/sql/src/parser.rs | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs index f7680aa4483d..6d130647a49f 100644 --- a/datafusion/sql/src/parser.rs +++ b/datafusion/sql/src/parser.rs @@ -915,8 +915,9 @@ mod tests { // positive case let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'"; let display = None; + let name = ObjectName(vec![Ident::from("t")]); let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![make_column_def("c1", DataType::Int(display))], file_type: "CSV".to_string(), location: "foo.csv".into(), @@ -932,7 +933,7 @@ mod tests { // positive case: leading space let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' "; let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![make_column_def("c1", DataType::Int(None))], file_type: "CSV".to_string(), location: "foo.csv".into(), @@ -949,7 +950,7 @@ mod tests { let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ;"; let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![make_column_def("c1", DataType::Int(None))], file_type: "CSV".to_string(), location: "foo.csv".into(), @@ -966,7 +967,7 @@ mod tests { let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')"; let display = None; let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![make_column_def("c1", DataType::Int(display))], file_type: "CSV".to_string(), location: "foo.csv".into(), @@ -986,7 +987,7 @@ mod tests { let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'"; let display = None; let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![make_column_def("c1", DataType::Int(display))], file_type: "CSV".to_string(), location: "foo.csv".into(), @@ -1013,7 +1014,7 @@ mod tests { ]; for (sql, compression) in sqls { let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![make_column_def("c1", DataType::Int(display))], file_type: "CSV".to_string(), location: "foo.csv".into(), @@ -1033,7 +1034,7 @@ mod tests { // positive case: it is ok for parquet files not to have columns specified let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'"; let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![], file_type: "PARQUET".to_string(), location: "foo.parquet".into(), @@ -1049,7 +1050,7 @@ mod tests { // positive case: it is ok for parquet files to be other than upper case let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'"; let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![], file_type: "PARQUET".to_string(), location: "foo.parquet".into(), @@ -1065,7 +1066,7 @@ mod tests { // positive case: it is ok for avro files not to have columns specified let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'"; let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![], file_type: "AVRO".to_string(), location: "foo.avro".into(), @@ -1082,7 +1083,7 @@ mod tests { let sql = "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'"; let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![], file_type: "PARQUET".to_string(), location: "foo.parquet".into(), @@ -1099,7 +1100,7 @@ mod tests { let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'"; let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![ make_column_def("c1", DataType::Int(None)), make_column_def("p1", DataType::Int(None)), @@ -1132,7 +1133,7 @@ mod tests { let sql = "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'"; let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![], file_type: "X".to_string(), location: "blahblah".into(), @@ -1149,7 +1150,7 @@ mod tests { let sql = "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'"; let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![], file_type: "X".to_string(), location: "blahblah".into(), @@ -1188,7 +1189,7 @@ mod tests { ]; for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) { let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![make_column_def("c1", DataType::Int(None))], file_type: "CSV".to_string(), location: "foo.csv".into(), @@ -1214,7 +1215,7 @@ mod tests { let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'"; let display = None; let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![ make_column_def("c1", DataType::Int(display)), make_column_def("c2", DataType::Int(display)), @@ -1253,7 +1254,7 @@ mod tests { let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'"; let display = None; let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![ make_column_def("c1", DataType::Int(display)), make_column_def("c2", DataType::Int(display)), @@ -1297,7 +1298,7 @@ mod tests { 'TRUNCATE' 'NO', 'format.has_header' 'true')"; let expected = Statement::CreateExternalTable(CreateExternalTable { - name: "t".into(), + name: name.clone(), columns: vec![ make_column_def("c1", DataType::Int(None)), make_column_def("c2", DataType::Float(None)),