diff --git a/Cargo.lock b/Cargo.lock index f7c638228..23abbf47d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1146,21 +1146,6 @@ dependencies = [ "syn 2.0.48", ] -[[package]] -name = "bit-set" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" -dependencies = [ - "bit-vec", -] - -[[package]] -name = "bit-vec" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" - [[package]] name = "bitflags" version = "1.3.2" @@ -2496,6 +2481,7 @@ dependencies = [ "once_cell", "openssh", "parking_lot", + "parser", "protogen", "rand", "regex", @@ -4798,8 +4784,6 @@ dependencies = [ "object_store_util", "once_cell", "pgrepr", - "proptest", - "proptest-derive", "prost", "prost-types", "protogen", @@ -5555,6 +5539,17 @@ dependencies = [ "regex", ] +[[package]] +name = "parser" +version = "0.9.1" +dependencies = [ + "datafusion", + "datafusion_ext", + "protogen", + "prql-compiler", + "thiserror", +] + [[package]] name = "password-hash" version = "0.4.2" @@ -5679,6 +5674,7 @@ dependencies = [ "datafusion", "datafusion_ext", "futures", + "parser", "pgrepr", "proxyutil", "reqwest", @@ -5972,37 +5968,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "proptest" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31b476131c3c86cb68032fdc5cb6d5a1045e3e42d96b69fa599fd77701e1f5bf" -dependencies = [ - "bit-set", - "bit-vec", - "bitflags 2.4.1", - "lazy_static", - "num-traits", - "rand", - "rand_chacha", - "rand_xorshift", - "regex-syntax 0.8.2", - "rusty-fork", - "tempfile", - "unarray", -] - -[[package]] -name = "proptest-derive" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cf16337405ca084e9c78985114633b6827711d22b9e6ef6c6c0d665eb3f0b6e" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "prost" version = "0.12.3" @@ -6063,8 +6028,6 @@ version = "0.9.1" dependencies = [ "datafusion", "datafusion-proto", - "proptest", - "proptest-derive", "prost", "prost-build", "prost-types", @@ -6390,15 +6353,6 @@ dependencies = [ "rand_core", ] -[[package]] -name = "rand_xorshift" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" -dependencies = [ - "rand_core", -] - [[package]] name = "rand_xoshiro" version = "0.6.0" @@ -7094,18 +7048,6 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" -[[package]] -name = "rusty-fork" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" -dependencies = [ - "fnv", - "quick-error", - "tempfile", - "wait-timeout", -] - [[package]] name = "ryu" version = "1.0.15" @@ -7864,11 +7806,11 @@ dependencies = [ "object_store_util", "once_cell", "parking_lot", + "parser", "pgrepr", "prost", "protogen", "proxyutil", - "prql-compiler", "reqwest", "serde", "sqlbuiltins", @@ -8908,12 +8850,6 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" -[[package]] -name = "unarray" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" - [[package]] name = "unicase" version = "2.7.0" diff --git a/crates/datasources/Cargo.toml b/crates/datasources/Cargo.toml index efeea7a56..501fb650a 100644 --- a/crates/datasources/Cargo.toml +++ b/crates/datasources/Cargo.toml @@ -25,6 +25,7 @@ futures = { workspace = true } gcp-bigquery-client = "0.18.0" klickhouse = { version = "0.11.2", features = ["tls"] } protogen = { path = "../protogen" } +parser = {path = "../parser"} datafusion_ext = { path = "../datafusion_ext" } mongodb = "2.8.1" mysql_async = { version = "0.33.0", default-features = false, features = [ diff --git a/crates/datasources/src/debug/mod.rs b/crates/datasources/src/debug/mod.rs index 2288aa8bd..68d9f503d 100644 --- a/crates/datasources/src/debug/mod.rs +++ b/crates/datasources/src/debug/mod.rs @@ -37,6 +37,8 @@ use datafusion_ext::errors::ExtensionError; use datafusion_ext::functions::VirtualLister; use errors::DebugError; use futures::Stream; +use parser::errors::ParserError; +use parser::options::{OptionValue, ParseOptionValue}; use protogen::metastore::types::options::TunnelOptions; use serde::{Deserialize, Serialize}; @@ -48,6 +50,22 @@ pub enum DebugTableType { NeverEnding, } +impl ParseOptionValue for OptionValue { + fn parse_opt(self) -> Result { + let opt = match self { + Self::QuotedLiteral(s) | Self::UnquotedLiteral(s) => s + .parse() + .map_err(|e: DebugError| ParserError::ParserError(e.to_string()))?, + o => { + return Err(ParserError::ParserError(format!( + "Expected a string, got: {}", + o + ))) + } + }; + Ok(opt) + } +} impl fmt::Display for DebugTableType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.as_str()) diff --git a/crates/datasources/src/mongodb/mod.rs b/crates/datasources/src/mongodb/mod.rs index 0a2c58140..b5cde4578 100644 --- a/crates/datasources/src/mongodb/mod.rs +++ b/crates/datasources/src/mongodb/mod.rs @@ -25,6 +25,9 @@ use mongodb::bson::spec::BinarySubtype; use mongodb::bson::{bson, Binary, Bson, Document, RawDocumentBuf}; use mongodb::options::{ClientOptions, FindOptions}; use mongodb::{Client, Collection}; +use parser::errors::ParserError; +use parser::options::{OptionValue, ParseOptionValue}; +use parser::{parser_err, unexpected_type_err}; use tracing::debug; use crate::bson::array_to_bson; @@ -66,6 +69,19 @@ impl FromStr for MongoDbProtocol { } } +impl ParseOptionValue for OptionValue { + fn parse_opt(self) -> Result { + let opt = match self { + Self::QuotedLiteral(s) | Self::UnquotedLiteral(s) => { + s.parse().map_err(|e| parser_err!("{e}"))? + } + o => return Err(unexpected_type_err!("mongodb protocol", o)), + }; + Ok(opt) + } +} + + impl Display for MongoDbProtocol { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let s = match self { diff --git a/crates/metastore/Cargo.toml b/crates/metastore/Cargo.toml index 8c8532c44..fbbd54c65 100644 --- a/crates/metastore/Cargo.toml +++ b/crates/metastore/Cargo.toml @@ -1,18 +1,18 @@ [package] name = "metastore" -version = {workspace = true} -edition = {workspace = true} +version = { workspace = true } +edition = { workspace = true } [lints] workspace = true [dependencies] -ioutil = {path = "../ioutil"} -logutil = {path = "../logutil"} -protogen = {path = "../protogen"} +ioutil = { path = "../ioutil" } +logutil = { path = "../logutil" } +protogen = { path = "../protogen" } sqlbuiltins = { path = "../sqlbuiltins" } -object_store_util = {path = "../object_store_util"} -pgrepr = {path = "../pgrepr"} +object_store_util = { path = "../object_store_util" } +pgrepr = { path = "../pgrepr" } async-trait = { workspace = true } datafusion = { workspace = true } futures = { workspace = true } @@ -26,10 +26,6 @@ tracing = { workspace = true } uuid = { version = "1.7.0", features = ["v4", "fast-rng", "macro-diagnostics"] } bytes = "1.4" once_cell = "1.19.0" -proptest-derive = "0.4" tower = "0.4" dashmap = "5.5.0" catalog = { path = "../catalog" } - -[dev-dependencies] -proptest = "1.4" diff --git a/crates/parser/Cargo.toml b/crates/parser/Cargo.toml new file mode 100644 index 000000000..f38e17f6c --- /dev/null +++ b/crates/parser/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "parser" +version.workspace = true +edition.workspace = true + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +datafusion.workspace = true +datafusion_ext = { path = "../datafusion_ext" } +protogen = { path = "../protogen" } +prql-compiler = "0.11.3" +thiserror.workspace = true + + +[lints] +workspace = true diff --git a/crates/parser/src/errors.rs b/crates/parser/src/errors.rs new file mode 100644 index 000000000..ca1cd81b9 --- /dev/null +++ b/crates/parser/src/errors.rs @@ -0,0 +1,20 @@ +pub use datafusion::sql::sqlparser::parser::ParserError; + +#[derive(Debug, thiserror::Error)] +pub enum ParseError { + #[error("SQL statement currently unsupported: {0}")] + UnsupportedSQLStatement(String), + #[error(transparent)] + SqlParser(#[from] ParserError), + #[error("Invalid object name length: {length}, max: {max}")] + InvalidNameLength { length: usize, max: usize }, + #[error("Other error: {0}")] + Other(String), +} + +pub type Result = std::result::Result; +impl From for ParserError { + fn from(e: ParseError) -> Self { + ParserError::ParserError(e.to_string()) + } +} diff --git a/crates/sqlexec/src/parser.rs b/crates/parser/src/lib.rs similarity index 97% rename from crates/sqlexec/src/parser.rs rename to crates/parser/src/lib.rs index 558029774..df27a0e5a 100644 --- a/crates/sqlexec/src/parser.rs +++ b/crates/parser/src/lib.rs @@ -1,3 +1,4 @@ +pub mod errors; pub mod options; use std::collections::{BTreeMap, VecDeque}; @@ -12,8 +13,8 @@ use datafusion_ext::vars::Dialect; use prql_compiler::sql::Dialect as PrqlDialect; use prql_compiler::{compile, Options, Target}; -use self::options::{OptionValue, StmtOptions}; -use crate::errors::Result; +use self::options::{OptionValue, StatementOptions}; +use crate::errors::{ParseError, Result}; /// Wrapper around our custom parse for parsing a sql statement. pub fn parse_sql(sql: &str) -> Result> { @@ -42,7 +43,7 @@ pub struct CreateExternalTableStmt { /// Credentials to use for configuration. pub credentials: Option, /// Datasource specific options. - pub options: StmtOptions, + pub options: StatementOptions, } impl fmt::Display for CreateExternalTableStmt { @@ -82,7 +83,7 @@ pub struct CreateExternalDatabaseStmt { /// Credentials to use for configuration. pub credentials: Option, /// Datasource specific options. - pub options: StmtOptions, + pub options: StatementOptions, } impl fmt::Display for CreateExternalDatabaseStmt { @@ -194,7 +195,7 @@ pub struct CreateTunnelStmt { /// The tunnel type the connection is for. pub tunnel: Ident, /// Tunnel specific options. - pub options: StmtOptions, + pub options: StatementOptions, } impl fmt::Display for CreateTunnelStmt { @@ -270,7 +271,7 @@ pub struct CreateCredentialsStmt { /// The credentials provider. pub provider: Ident, /// Credentials specific options. - pub options: StmtOptions, + pub options: StatementOptions, /// Optional comment (what the credentials are for). pub comment: String, /// replace if it exists @@ -312,7 +313,7 @@ pub struct CreateCredentialStmt { /// The credentials provider. pub provider: Ident, /// Credentials specific options. - pub options: StmtOptions, + pub options: StatementOptions, /// Optional comment (what the credentials are for). pub comment: String, /// replace if it exists @@ -385,7 +386,7 @@ pub struct CopyToStmt { /// Optional credentials (for cloud storage). pub credentials: Option, /// COPY TO specific options. - pub options: StmtOptions, + pub options: StatementOptions, } impl fmt::Display for CopyToStmt { @@ -824,7 +825,7 @@ impl<'a> CustomParser<'a> { } /// Parse options block. - fn parse_options(&mut self) -> Result { + fn parse_options(&mut self) -> Result { let has_options_keyword = self.consume_token(&Token::make_keyword("OPTIONS")); let has_block_opening = self.parser.consume_token(&Token::LParen); @@ -833,7 +834,7 @@ impl<'a> CustomParser<'a> { }; if !has_options_keyword && !has_block_opening { - return Ok(StmtOptions::new(BTreeMap::new())); + return Ok(StatementOptions::new(BTreeMap::new())); }; if has_options_keyword && !has_block_opening { @@ -879,7 +880,7 @@ impl<'a> CustomParser<'a> { } } - Ok(StmtOptions::new(options)) + Ok(StatementOptions::new(options)) } fn parse_options_value(&mut self) -> Result { @@ -1084,13 +1085,26 @@ impl<'a> CustomParser<'a> { } } -pub fn validate_ident(ident: &ast::Ident) -> Result<(), ParserError> { - sqlbuiltins::validation::validate_object_name(&ident.value) - .map_err(|e| ParserError::ParserError(e.to_string())) +pub fn validate_ident(ident: &ast::Ident) -> Result<()> { + pg_validate_object_name(&ident.value) +} + +/// Validate identifiers as per [postgres identifier +/// syntax](https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS) +pub fn pg_validate_object_name(name: &str) -> Result<()> { + const POSTGRES_IDENT_MAX_LENGTH: usize = 63; + if name.len() > POSTGRES_IDENT_MAX_LENGTH { + return Err(ParseError::InvalidNameLength { + length: name.len(), + max: POSTGRES_IDENT_MAX_LENGTH, + }); + } + + Ok(()) } /// Validate object names a `Vec` -pub fn validate_object_name(name: &ast::ObjectName) -> Result<(), ParserError> { +pub fn validate_object_name(name: &ast::ObjectName) -> Result<()> { for ident in name.0.iter() { validate_ident(ident)?; } @@ -1123,7 +1137,7 @@ mod tests { datasource: Ident::new("postgres"), tunnel: None, credentials: None, - options: StmtOptions::new(options), + options: StatementOptions::new(options), }; println!("{:?}", stmt); @@ -1162,7 +1176,7 @@ mod tests { datasource: Ident::new("postgres"), tunnel: None, credentials: None, - options: StmtOptions::new(options), + options: StatementOptions::new(options), }; assert_eq!( @@ -1485,7 +1499,7 @@ mod tests { parser: Parser::new(&d).with_tokens(t), }; let opts = p.parse_options().unwrap(); - let expected_opts = StmtOptions::new(map); + let expected_opts = StatementOptions::new(map); assert_eq!(opts, expected_opts); } } diff --git a/crates/sqlexec/src/parser/options.rs b/crates/parser/src/options.rs similarity index 88% rename from crates/sqlexec/src/parser/options.rs rename to crates/parser/src/options.rs index 574adedac..00db6cc9a 100644 --- a/crates/sqlexec/src/parser/options.rs +++ b/crates/parser/src/options.rs @@ -4,8 +4,6 @@ use std::fmt; use datafusion::common::parsers::CompressionTypeVariant; use datafusion::common::FileType; use datafusion::sql::sqlparser::parser::ParserError; -use datasources::debug::DebugTableType; -use datasources::mongodb::MongoDbProtocol; use protogen::metastore::types::options::StorageOptions; /// Contains the value parsed from Options(...). @@ -37,12 +35,14 @@ pub trait ParseOptionValue { fn parse_opt(self) -> Result; } +#[macro_export] macro_rules! parser_err { ($($arg:tt)*) => { ParserError::ParserError(format!($($arg)*)) }; } +#[macro_export] macro_rules! unexpected_type_err { ($t:expr, $v:expr) => { parser_err!("Expected a {}, got: {}", $t, $v) @@ -150,29 +150,6 @@ impl ParseOptionValue for OptionValue { } } -impl ParseOptionValue for OptionValue { - fn parse_opt(self) -> Result { - let opt = match self { - Self::QuotedLiteral(s) | Self::UnquotedLiteral(s) => { - s.parse().map_err(|e| parser_err!("{e}"))? - } - o => return Err(unexpected_type_err!("mongodb protocol", o)), - }; - Ok(opt) - } -} - -impl ParseOptionValue for OptionValue { - fn parse_opt(self) -> Result { - let opt = match self { - Self::QuotedLiteral(s) | Self::UnquotedLiteral(s) => { - s.parse().map_err(|e| parser_err!("{e}"))? - } - o => return Err(unexpected_type_err!("debug table type", o)), - }; - Ok(opt) - } -} impl ParseOptionValue for OptionValue { fn parse_opt(self) -> Result { @@ -199,11 +176,11 @@ impl ParseOptionValue for OptionValue { } #[derive(Debug, Clone, PartialEq, Eq)] -pub struct StmtOptions { +pub struct StatementOptions { m: BTreeMap, } -impl fmt::Display for StmtOptions { +impl fmt::Display for StatementOptions { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "OPTIONS (")?; let mut sep = ""; @@ -215,10 +192,10 @@ impl fmt::Display for StmtOptions { } } -impl TryFrom<&mut StmtOptions> for StorageOptions { +impl TryFrom<&mut StatementOptions> for StorageOptions { type Error = ParserError; - fn try_from(value: &mut StmtOptions) -> Result { + fn try_from(value: &mut StatementOptions) -> Result { let mut inner = BTreeMap::new(); for (key, value) in value.m.iter() { inner.insert(key.clone(), value.clone().parse_opt()?); @@ -227,7 +204,7 @@ impl TryFrom<&mut StmtOptions> for StorageOptions { } } -impl StmtOptions { +impl StatementOptions { pub fn new(m: BTreeMap) -> Self { Self { m } } diff --git a/crates/pgsrv/Cargo.toml b/crates/pgsrv/Cargo.toml index 4e559e585..0c7f62dc7 100644 --- a/crates/pgsrv/Cargo.toml +++ b/crates/pgsrv/Cargo.toml @@ -18,6 +18,7 @@ serde = { workspace = true } sqlexec = {path = "../sqlexec"} proxyutil = {path = "../proxyutil"} bytesutil = {path = "../bytesutil"} +parser = { path = "../parser" } pgrepr = {path = "../pgrepr"} datafusion_ext = {path = "../datafusion_ext"} bytes = "1.4.0" diff --git a/crates/pgsrv/src/handler.rs b/crates/pgsrv/src/handler.rs index 0eab80bd9..70dda35b7 100644 --- a/crates/pgsrv/src/handler.rs +++ b/crates/pgsrv/src/handler.rs @@ -8,11 +8,11 @@ use datafusion::scalar::ScalarValue; use datafusion::variable::VarType; use datafusion_ext::vars::{Dialect, SessionVars}; use futures::StreamExt; +use parser::StatementWithExtensions; use pgrepr::format::Format; use pgrepr::scalar::Scalar; use sqlexec::context::local::{OutputFields, Portal, PreparedStatement}; use sqlexec::engine::{Engine, SessionStorageConfig}; -use sqlexec::parser::{self, StatementWithExtensions}; use sqlexec::session::{ExecutionResult, Session}; use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; use tokio_postgres::types::Type as PgType; diff --git a/crates/protogen/Cargo.toml b/crates/protogen/Cargo.toml index 5f42469b6..dd0292f02 100644 --- a/crates/protogen/Cargo.toml +++ b/crates/protogen/Cargo.toml @@ -13,8 +13,6 @@ thiserror.workspace = true tonic = { workspace = true } prost = { workspace = true } prost-types = { workspace = true } -proptest = "1.4" -proptest-derive = "0.4" uuid = { version = "1.7.0", features = ["v4", "fast-rng", "macro-diagnostics"] } tracing = { workspace = true } diff --git a/crates/protogen/src/metastore/types/catalog.rs b/crates/protogen/src/metastore/types/catalog.rs index fbd4c2527..b8dce099e 100644 --- a/crates/protogen/src/metastore/types/catalog.rs +++ b/crates/protogen/src/metastore/types/catalog.rs @@ -4,7 +4,6 @@ use std::str::FromStr; use datafusion::arrow::datatypes::DataType; use datafusion::logical_expr::{Signature, TypeSignature, Volatility}; -use proptest_derive::Arbitrary; use super::options::{ CredentialsOptions, @@ -186,7 +185,7 @@ impl TryFrom for catalog::CatalogEntry { } } -#[derive(Debug, Clone, Copy, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum EntryType { Database, Schema, @@ -263,7 +262,7 @@ impl fmt::Display for EntryType { } /// Metadata associated with every entry in the catalog. -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct EntryMeta { pub entry_type: EntryType, pub id: u32, @@ -304,7 +303,7 @@ impl TryFrom for EntryMeta { } } -#[derive(Debug, Clone, Copy, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum SourceAccessMode { ReadOnly, ReadWrite, @@ -382,7 +381,7 @@ impl From for i32 { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct DatabaseEntry { pub meta: EntryMeta, pub options: DatabaseOptions, @@ -414,7 +413,7 @@ impl From for catalog::DatabaseEntry { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct SchemaEntry { pub meta: EntryMeta, } @@ -484,7 +483,7 @@ impl fmt::Display for TableEntry { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct ViewEntry { pub meta: EntryMeta, pub sql: String, @@ -513,7 +512,7 @@ impl From for catalog::ViewEntry { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct TunnelEntry { pub meta: EntryMeta, pub options: TunnelOptions, @@ -539,7 +538,7 @@ impl From for catalog::TunnelEntry { } } -#[derive(Debug, Clone, Copy, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum FunctionType { Aggregate, Scalar, @@ -590,7 +589,7 @@ impl From for catalog::FunctionType { } /// The runtime preference for a function. -#[derive(Debug, Clone, Copy, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum RuntimePreference { Unspecified, Local, @@ -809,7 +808,7 @@ impl From for catalog::FunctionEntry { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct CredentialsEntry { pub meta: EntryMeta, pub options: CredentialsOptions, @@ -840,28 +839,9 @@ impl From for catalog::CredentialsEntry { #[cfg(test)] mod tests { - use proptest::arbitrary::any; - use proptest::proptest; use super::*; - proptest! { - #[test] - fn roundtrip_entry_type(expected in any::()) { - let p: catalog::entry_meta::EntryType = expected.into(); - let got: EntryType = p.try_into().unwrap(); - assert_eq!(expected, got); - } - } - - proptest! { - #[test] - fn roundtrip_entry_meta(expected in any::()) { - let p: catalog::EntryMeta = expected.clone().into(); - let got: EntryMeta = p.try_into().unwrap(); - assert_eq!(expected, got); - } - } #[test] fn convert_catalog_state_no_deployment_metadata() { diff --git a/crates/protogen/src/metastore/types/options.rs b/crates/protogen/src/metastore/types/options.rs index b45b30c0c..eafe07bb8 100644 --- a/crates/protogen/src/metastore/types/options.rs +++ b/crates/protogen/src/metastore/types/options.rs @@ -3,18 +3,15 @@ use std::fmt; use datafusion::arrow::datatypes::{DataType, Field, Fields, SchemaRef}; use datafusion::common::DFSchemaRef; -use proptest_derive::Arbitrary; use crate::gen::common::arrow; use crate::gen::metastore::options; use crate::{FromOptionalField, ProtoConvError}; -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct InternalColumnDefinition { pub name: String, pub nullable: bool, - // TODO: change proptest strategy to select random DataType - #[proptest(value("DataType::Utf8"))] pub arrow_type: DataType, } @@ -80,7 +77,7 @@ impl TryFrom for options::InternalColumnDefinition { // Database options -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum DatabaseOptions { Internal(DatabaseOptionsInternal), Debug(DatabaseOptionsDebug), @@ -212,7 +209,7 @@ impl From for options::DatabaseOptions { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DatabaseOptionsInternal {} impl TryFrom for DatabaseOptionsInternal { @@ -228,7 +225,7 @@ impl From for options::DatabaseOptionsInternal { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DatabaseOptionsDebug {} impl TryFrom for DatabaseOptionsDebug { @@ -244,7 +241,7 @@ impl From for options::DatabaseOptionsDebug { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DatabaseOptionsPostgres { pub connection_string: String, } @@ -266,7 +263,7 @@ impl From for options::DatabaseOptionsPostgres { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DatabaseOptionsBigQuery { pub service_account_key: String, pub project_id: String, @@ -291,7 +288,7 @@ impl From for options::DatabaseOptionsBigQuery { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DatabaseOptionsMysql { pub connection_string: String, } @@ -313,7 +310,7 @@ impl From for options::DatabaseOptionsMysql { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DatabaseOptionsMongoDb { pub connection_string: String, } @@ -335,7 +332,7 @@ impl From for options::DatabaseOptionsMongoDb { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DatabaseOptionsSqlServer { pub connection_string: String, } @@ -357,7 +354,7 @@ impl From for options::DatabaseOptionsSqlServer { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DatabaseOptionsClickhouse { pub connection_string: String, } @@ -378,7 +375,7 @@ impl From for options::DatabaseOptionsClickhouse { } } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DatabaseOptionsCassandra { pub host: String, pub username: Option, @@ -406,7 +403,7 @@ impl From for options::DatabaseOptionsCassandra { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DatabaseOptionsSqlite { pub location: String, } @@ -428,7 +425,7 @@ impl From for options::DatabaseOptionsSqlite { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DatabaseOptionsSnowflake { pub account_name: String, pub login_name: String, @@ -465,7 +462,7 @@ impl From for options::DatabaseOptionsSnowflake { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DatabaseOptionsDeltaLake { pub catalog: DeltaLakeCatalog, pub storage_options: StorageOptions, @@ -492,7 +489,7 @@ impl From for options::DatabaseOptionsDeltaLake { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum DeltaLakeCatalog { Unity(DeltaLakeUnityCatalog), } @@ -518,7 +515,7 @@ impl From for options::database_options_delta_lake::Catalog { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct DeltaLakeUnityCatalog { pub catalog_id: String, pub databricks_access_token: String, @@ -558,7 +555,7 @@ impl From for options::DeltaLakeUnityCatalog { /// - [Azure options](https://docs.rs/object_store/latest/object_store/azure/enum.AzureConfigKey.html#variants) /// - [S3 options](https://docs.rs/object_store/latest/object_store/aws/enum.AmazonS3ConfigKey.html#variants) /// - [Google options](https://docs.rs/object_store/latest/object_store/gcp/enum.GoogleConfigKey.html#variants) -#[derive(Debug, Default, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)] pub struct StorageOptions { pub inner: BTreeMap, } @@ -592,7 +589,7 @@ impl From for options::StorageOptions { // Table options -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum TableOptions { Internal(TableOptionsInternal), Debug(TableOptionsDebug), @@ -748,7 +745,7 @@ impl TryFrom for options::TableOptions { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsInternal { pub columns: Vec, } @@ -811,7 +808,7 @@ impl TryFrom for options::TableOptionsInternal { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsDebug { pub table_type: String, } @@ -833,7 +830,7 @@ impl From for options::TableOptionsDebug { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsPostgres { pub connection_string: String, pub schema: String, @@ -861,7 +858,7 @@ impl From for options::TableOptionsPostgres { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsBigQuery { pub service_account_key: String, pub project_id: String, @@ -892,7 +889,7 @@ impl From for options::TableOptionsBigQuery { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsMysql { pub connection_string: String, pub schema: String, @@ -920,7 +917,7 @@ impl From for options::TableOptionsMysql { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsLocal { pub location: String, pub file_type: String, @@ -948,7 +945,7 @@ impl From for options::TableOptionsLocal { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsGcs { pub service_account_key: Option, pub bucket: String, @@ -982,7 +979,7 @@ impl From for options::TableOptionsGcs { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsS3 { pub access_key_id: Option, pub secret_access_key: Option, @@ -1021,7 +1018,7 @@ impl From for options::TableOptionsS3 { } } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsMongoDb { pub connection_string: String, pub database: String, @@ -1049,7 +1046,7 @@ impl From for options::TableOptionsMongo { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsExcel { pub location: String, pub storage_options: StorageOptions, @@ -1086,7 +1083,7 @@ impl From for options::TableOptionsExcel { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsSqlServer { pub connection_string: String, pub schema: String, @@ -1114,7 +1111,7 @@ impl From for options::TableOptionsSqlServer { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsClickhouse { pub connection_string: String, pub table: String, @@ -1142,7 +1139,7 @@ impl From for options::TableOptionsClickhouse { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsCassandra { pub host: String, pub keyspace: String, @@ -1176,7 +1173,7 @@ impl From for options::TableOptionsCassandra { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsSqlite { pub location: String, pub table: String, @@ -1201,7 +1198,7 @@ impl From for options::TableOptionsSqlite { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsSnowflake { pub account_name: String, pub login_name: String, @@ -1244,7 +1241,7 @@ impl From for options::TableOptionsSnowflake { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TableOptionsObjectStore { pub location: String, pub storage_options: StorageOptions, @@ -1278,7 +1275,7 @@ impl From for options::TableOptionsObjectStore { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum TunnelOptions { Internal(TunnelOptionsInternal), Debug(TunnelOptionsDebug), @@ -1341,7 +1338,7 @@ impl From for options::TunnelOptions { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TunnelOptionsInternal {} impl TryFrom for TunnelOptionsInternal { @@ -1357,7 +1354,7 @@ impl From for options::TunnelOptionsInternal { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TunnelOptionsDebug {} impl TryFrom for TunnelOptionsDebug { @@ -1373,7 +1370,7 @@ impl From for options::TunnelOptionsDebug { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TunnelOptionsSsh { pub connection_string: String, pub ssh_key: Vec, @@ -1397,34 +1394,7 @@ impl From for options::TunnelOptionsSsh { } } } - -#[cfg(test)] -mod tests { - use proptest::arbitrary::any; - use proptest::proptest; - - use super::*; - - proptest! { - #[test] - fn roundtrip_table_options(expected in any::()) { - let p: options::TableOptions = expected.clone().try_into().unwrap(); - let got: TableOptions = p.try_into().unwrap(); - assert_eq!(expected, got); - } - } - - proptest! { - #[test] - fn roundtrip_connection_options(expected in any::()) { - let p: options::DatabaseOptions = expected.clone().into(); - let got: DatabaseOptions = p.try_into().unwrap(); - assert_eq!(expected, got); - } - } -} - -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum CredentialsOptions { Debug(CredentialsOptionsDebug), Gcp(CredentialsOptionsGcp), @@ -1499,7 +1469,7 @@ impl From for options::CredentialsOptions { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct CredentialsOptionsDebug { pub table_type: String, } @@ -1521,7 +1491,7 @@ impl From for options::CredentialsOptionsDebug { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct CredentialsOptionsGcp { pub service_account_key: String, } @@ -1543,7 +1513,7 @@ impl From for options::CredentialsOptionsGcp { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct CredentialsOptionsAws { pub access_key_id: String, pub secret_access_key: String, @@ -1568,7 +1538,7 @@ impl From for options::CredentialsOptionsAws { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct CredentialsOptionsAzure { pub account_name: String, pub access_key: String, @@ -1593,7 +1563,7 @@ impl From for options::CredentialsOptionsAzure { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct CredentialsOptionsOpenAI { pub api_key: String, pub api_base: Option, diff --git a/crates/protogen/src/metastore/types/service.rs b/crates/protogen/src/metastore/types/service.rs index 8d0076021..97b93e438 100644 --- a/crates/protogen/src/metastore/types/service.rs +++ b/crates/protogen/src/metastore/types/service.rs @@ -1,5 +1,4 @@ use datafusion::logical_expr::Signature; -use proptest_derive::Arbitrary; use super::catalog::{FunctionType, SourceAccessMode}; use super::options::{ @@ -126,7 +125,7 @@ impl TryFrom for service::Mutation { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct DropDatabase { pub name: String, pub if_exists: bool, @@ -152,7 +151,7 @@ impl From for service::DropDatabase { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct DropSchema { pub name: String, pub if_exists: bool, @@ -181,7 +180,7 @@ impl From for service::DropSchema { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct DropObject { pub schema: String, pub name: String, @@ -210,7 +209,7 @@ impl From for service::DropObject { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct CreateSchema { pub name: String, pub if_not_exists: bool, @@ -235,7 +234,7 @@ impl From for service::CreateSchema { } } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct CreateView { pub schema: String, pub name: String, @@ -270,7 +269,7 @@ impl From for service::CreateView { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct CreateTable { pub schema: String, pub name: String, @@ -342,7 +341,7 @@ impl TryFrom for service::CreateFunction { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct CreateExternalTable { pub schema: String, pub name: String, @@ -381,7 +380,7 @@ impl TryFrom for service::CreateExternalTable { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct CreateExternalDatabase { pub name: String, pub options: DatabaseOptions, @@ -412,7 +411,7 @@ impl From for service::CreateExternalDatabase { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum AlterTableOperation { RenameTable { new_name: String }, SetAccessMode { access_mode: SourceAccessMode }, @@ -468,7 +467,7 @@ impl From for service::AlterTableOperation { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct AlterTable { pub schema: String, pub name: String, @@ -496,7 +495,7 @@ impl From for service::AlterTable { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum AlterDatabaseOperation { RenameDatabase { new_name: String }, SetAccessMode { access_mode: SourceAccessMode }, @@ -552,7 +551,7 @@ impl From for service::AlterDatabaseOperation { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct AlterDatabase { pub name: String, pub operation: AlterDatabaseOperation, @@ -577,7 +576,7 @@ impl From for service::AlterDatabase { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct CreateTunnel { pub name: String, pub options: TunnelOptions, @@ -605,7 +604,7 @@ impl From for service::CreateTunnel { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct DropTunnel { pub name: String, pub if_exists: bool, @@ -630,7 +629,7 @@ impl From for service::DropTunnel { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct AlterTunnelRotateKeys { pub name: String, pub if_exists: bool, @@ -658,7 +657,7 @@ impl From for service::AlterTunnelRotateKeys { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct CreateCredentials { pub name: String, pub options: CredentialsOptions, @@ -689,7 +688,7 @@ impl From for service::CreateCredentials { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct DropCredentials { pub name: String, pub if_exists: bool, @@ -714,7 +713,7 @@ impl From for service::DropCredentials { } } -#[derive(Debug, Clone, Arbitrary, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct UpdateDeploymentStorage { pub new_storage_size: u64, } diff --git a/crates/sqlexec/Cargo.toml b/crates/sqlexec/Cargo.toml index a4c95db6a..64a7d394a 100644 --- a/crates/sqlexec/Cargo.toml +++ b/crates/sqlexec/Cargo.toml @@ -19,6 +19,7 @@ sqlbuiltins = { path = "../sqlbuiltins" } datasources = { path = "../datasources" } datafusion_ext = { path = "../datafusion_ext" } object_store_util = { path = "../object_store_util" } +parser = { path = "../parser" } distexec = { path = "../distexec" } dashmap = "5.5.0" metastore = { path = "../metastore" } @@ -40,7 +41,6 @@ url.workspace = true parking_lot = "0.12.1" serde = { workspace = true } reqwest = { workspace = true } -prql-compiler = "0.11.3" num_cpus = "1.16.0" [dev-dependencies] diff --git a/crates/sqlexec/src/context/local.rs b/crates/sqlexec/src/context/local.rs index a4cbff4ab..f6609d052 100644 --- a/crates/sqlexec/src/context/local.rs +++ b/crates/sqlexec/src/context/local.rs @@ -21,6 +21,7 @@ use datafusion_ext::session_metrics::SessionMetricsHandler; use datafusion_ext::vars::SessionVars; use datasources::native::access::NativeTableStorage; use distexec::scheduler::Scheduler; +use parser::StatementWithExtensions; use pgrepr::format::Format; use pgrepr::notice::Notice; use pgrepr::types::arrow_to_pg_type; @@ -38,7 +39,6 @@ use super::{new_datafusion_runtime_env, new_datafusion_session_config_opts}; use crate::environment::EnvironmentReader; use crate::errors::{internal, ExecError, Result}; use crate::optimizer::DdlInputOptimizationRule; -use crate::parser::StatementWithExtensions; use crate::planner::logical_plan::{ FullObjectReference, FullSchemaReference, diff --git a/crates/sqlexec/src/dispatch/mod.rs b/crates/sqlexec/src/dispatch/mod.rs index 3ac675674..c82d9978e 100644 --- a/crates/sqlexec/src/dispatch/mod.rs +++ b/crates/sqlexec/src/dispatch/mod.rs @@ -12,13 +12,13 @@ use datafusion::logical_expr::{LogicalPlan, LogicalPlanBuilder}; use datafusion::prelude::{Column, Expr, SessionContext as DfSessionContext}; use datafusion_ext::functions::{DefaultTableContextProvider, FuncParamValue}; use datasources::native::access::NativeTableStorage; +use parser::CustomParser; use protogen::metastore::types::catalog::{DatabaseEntry, FunctionEntry, TableEntry, ViewEntry}; use sqlbuiltins::functions::FunctionRegistry; use self::external::ExternalDispatcher; use crate::context::local::LocalSessionContext; use crate::dispatch::system::SystemTableDispatcher; -use crate::parser::CustomParser; use crate::planner::errors::PlanError; use crate::planner::session_planner::SessionPlanner; diff --git a/crates/sqlexec/src/errors.rs b/crates/sqlexec/src/errors.rs index acac8c20c..b815e1f51 100644 --- a/crates/sqlexec/src/errors.rs +++ b/crates/sqlexec/src/errors.rs @@ -64,7 +64,7 @@ pub enum ExecError { DataFusion(#[from] datafusion::common::DataFusionError), #[error(transparent)] - ParseError(#[from] datafusion::sql::sqlparser::parser::ParserError), + ParseError(#[from] parser::errors::ParseError), #[error(transparent)] Arrow(#[from] datafusion::arrow::error::ArrowError), diff --git a/crates/sqlexec/src/lib.rs b/crates/sqlexec/src/lib.rs index 405c492d6..f83cd0e8e 100644 --- a/crates/sqlexec/src/lib.rs +++ b/crates/sqlexec/src/lib.rs @@ -5,7 +5,6 @@ pub mod environment; pub mod errors; pub mod extension_codec; mod optimizer; -pub mod parser; pub mod remote; pub mod session; diff --git a/crates/sqlexec/src/planner/errors.rs b/crates/sqlexec/src/planner/errors.rs index 6b4195270..3a2484791 100644 --- a/crates/sqlexec/src/planner/errors.rs +++ b/crates/sqlexec/src/planner/errors.rs @@ -17,6 +17,8 @@ pub enum PlanError { #[error(transparent)] DataFusion(#[from] datafusion::common::DataFusionError), + #[error(transparent)] + Parser(#[from] parser::errors::ParseError), #[error(transparent)] Preprocess(#[from] crate::planner::preprocess::PreprocessError), @@ -64,7 +66,7 @@ pub enum PlanError { ExternalTableWithSsh, #[error("Expected exactly on SQL statement, got: {0:?}")] - ExpectedExactlyOneStatement(Vec), + ExpectedExactlyOneStatement(Vec), #[error("Not allowed to write into the object: {0}")] ObjectNotAllowedToWriteInto(OwnedTableReference), diff --git a/crates/sqlexec/src/planner/session_planner.rs b/crates/sqlexec/src/planner/session_planner.rs index 7346aa502..d24c73f24 100644 --- a/crates/sqlexec/src/planner/session_planner.rs +++ b/crates/sqlexec/src/planner/session_planner.rs @@ -44,6 +44,27 @@ use datasources::sqlserver::SqlServerAccess; use object_store::aws::AmazonS3ConfigKey; use object_store::azure::AzureConfigKey; use object_store::gcp::GoogleConfigKey; +use parser::options::StatementOptions; +use parser::{ + self, + validate_ident, + validate_object_name, + AlterDatabaseStmt, + AlterTableStmtExtension, + AlterTunnelAction, + AlterTunnelStmt, + CopyToSource, + CopyToStmt, + CreateCredentialStmt, + CreateCredentialsStmt, + CreateExternalDatabaseStmt, + CreateExternalTableStmt, + CreateTunnelStmt, + DropCredentialsStmt, + DropDatabaseStmt, + DropTunnelStmt, + StatementWithExtensions, +}; use protogen::metastore::types::catalog::{ CatalogEntry, DatabaseEntry, @@ -121,27 +142,6 @@ use super::context_builder::PartialContextProvider; use super::extension::ExtensionNode; use super::physical_plan::remote_scan::ProviderReference; use crate::context::local::LocalSessionContext; -use crate::parser::options::StmtOptions; -use crate::parser::{ - self, - validate_ident, - validate_object_name, - AlterDatabaseStmt, - AlterTableStmtExtension, - AlterTunnelAction, - AlterTunnelStmt, - CopyToSource, - CopyToStmt, - CreateCredentialStmt, - CreateCredentialsStmt, - CreateExternalDatabaseStmt, - CreateExternalTableStmt, - CreateTunnelStmt, - DropCredentialsStmt, - DropDatabaseStmt, - DropTunnelStmt, - StatementWithExtensions, -}; use crate::planner::errors::{internal, PlanError, Result}; use crate::planner::logical_plan::{ AlterDatabase, @@ -187,7 +187,7 @@ struct PlanCredentialArgs { /// The credentials provider. provider: Ident, /// Credentials specific options. - options: StmtOptions, + options: StatementOptions, /// Optional comment (what the credentials are for). comment: String, or_replace: bool, @@ -1810,7 +1810,7 @@ impl<'a> SessionPlanner<'a> { } fn get_bucket( - m: &mut StmtOptions, + m: &mut StatementOptions, uri: &Option, bucket_key: &str, ) -> Result { @@ -2037,7 +2037,7 @@ impl<'a> SessionPlanner<'a> { /// Get the object store bucket and location. fn get_obj_store_bucket_and_location( - m: &mut StmtOptions, + m: &mut StatementOptions, ty: DatasourceUrlType, bucket_key: &str, ) -> Result<(String, String)> { @@ -2070,7 +2070,7 @@ fn get_obj_store_bucket_and_location( async fn validate_and_get_file_type_and_compression( access: Arc, path: impl AsRef, - m: &mut StmtOptions, + m: &mut StatementOptions, ) -> Result<(String, Option)> { let path = path.as_ref(); let accessor = @@ -2291,7 +2291,7 @@ fn convert_simple_data_type(sql_type: &ast::DataType) -> Result { } } -fn get_pg_conn_str(m: &mut StmtOptions) -> Result { +fn get_pg_conn_str(m: &mut StatementOptions) -> Result { let conn = match m.remove_optional("connection_string")? { Some(conn_str) => PostgresDbConnection::ConnectionString(conn_str), None => { @@ -2313,7 +2313,7 @@ fn get_pg_conn_str(m: &mut StmtOptions) -> Result { Ok(conn.connection_string()) } -fn get_mysql_conn_str(m: &mut StmtOptions) -> Result { +fn get_mysql_conn_str(m: &mut StatementOptions) -> Result { let conn = match m.remove_optional("connection_string")? { Some(conn_str) => MysqlDbConnection::ConnectionString(conn_str), None => { @@ -2335,7 +2335,7 @@ fn get_mysql_conn_str(m: &mut StmtOptions) -> Result { Ok(conn.connection_string()) } -fn get_mongodb_conn_str(m: &mut StmtOptions) -> Result { +fn get_mongodb_conn_str(m: &mut StatementOptions) -> Result { let conn = match m.remove_optional("connection_string")? { Some(conn_str) => MongoDbConnection::ConnectionString(conn_str), None => { @@ -2357,7 +2357,7 @@ fn get_mongodb_conn_str(m: &mut StmtOptions) -> Result { Ok(conn.connection_string()) } -fn get_ssh_conn_str(m: &mut StmtOptions) -> Result { +fn get_ssh_conn_str(m: &mut StatementOptions) -> Result { let conn = match m.remove_optional("connection_string")? { Some(conn_str) => SshConnection::ConnectionString(conn_str), None => { diff --git a/crates/sqlexec/src/session.rs b/crates/sqlexec/src/session.rs index 51533b282..2550605f4 100644 --- a/crates/sqlexec/src/session.rs +++ b/crates/sqlexec/src/session.rs @@ -33,6 +33,7 @@ use distexec::scheduler::{OutputSink, Scheduler}; use distexec::stream::create_coalescing_adapter; use futures::{Stream, StreamExt}; use once_cell::sync::Lazy; +use parser::StatementWithExtensions; use pgrepr::format::Format; use pgrepr::notice::{Notice, NoticeSeverity, SqlState}; use sqlbuiltins::functions::BuiltinScalarUDF; @@ -42,7 +43,6 @@ use uuid::Uuid; use crate::context::local::{LocalSessionContext, Portal, PreparedStatement}; use crate::environment::EnvironmentReader; use crate::errors::{ExecError, Result}; -use crate::parser::StatementWithExtensions; use crate::planner::logical_plan::{LogicalPlan, OperationInfo, TransactionPlan}; use crate::planner::physical_plan::{ get_count_from_batch, @@ -125,7 +125,7 @@ pub struct PrepareStatementArg { impl<'a> TryFrom<&'a str> for PrepareStatementArg { type Error = ExecError; fn try_from(query: &'a str) -> Result { - let mut statements = crate::parser::parse_sql(query)?; + let mut statements = parser::parse_sql(query)?; match statements.len() { 0 => Err(ExecError::String("No statements in query".to_string())), 1 => Ok(PrepareStatementArg { @@ -732,7 +732,7 @@ impl Session { /// Errors if no statements or more than one statement is provided /// in the query. pub async fn prql_to_lp(&mut self, query: &str) -> Result { - let stmt = crate::parser::parse_prql(query)?; + let stmt = parser::parse_prql(query)?; self.prepare_statements(stmt).await } @@ -784,10 +784,10 @@ impl Session { } pub fn parse_query(&self, query: &str) -> Result> { - match self.get_session_vars().dialect() { - datafusion_ext::vars::Dialect::Sql => crate::parser::parse_sql(query), - datafusion_ext::vars::Dialect::Prql => crate::parser::parse_prql(query), - } + Ok(match self.get_session_vars().dialect() { + datafusion_ext::vars::Dialect::Sql => parser::parse_sql(query)?, + datafusion_ext::vars::Dialect::Prql => parser::parse_prql(query)?, + }) } /// Execute a SQL query.