From a15e89123d9d896f4242ef5eab3bc1e04c7f3f5c Mon Sep 17 00:00:00 2001 From: Sean Smith Date: Fri, 19 Jan 2024 07:30:44 -0600 Subject: [PATCH] feat: pg_catalog.version(), standard_conforming_strings session var (#2448) Adds the `pg_catalog.version()` function (which provides more info that the `server_version` session var). The format provides compatibility with sqlalchemy/great expectations. Also adds the `standard_conforming_strings` session var since great expectations was looking for this too. I included the python script I was running against to get great expectations working, and it seems to connect without issue. But I haven't tried anything more than that. cc @talagluck --- Cargo.lock | 27 +++++++++++++++ .../python/examples/great_expectations_ex.py | 7 ++++ bindings/python/requirements.txt | 2 ++ crates/datafusion_ext/src/vars/constants.rs | 12 +++++-- crates/datafusion_ext/src/vars/inner.rs | 6 ++++ crates/metastore/src/database.rs | 4 +++ crates/pgrepr/Cargo.toml | 1 + crates/pgrepr/src/compatible.rs | 31 +++++++++++++++++ crates/pgrepr/src/lib.rs | 1 + crates/sqlbuiltins/src/functions/mod.rs | 5 +++ .../sqlbuiltins/src/functions/scalars/mod.rs | 2 -- .../src/functions/scalars/postgres.rs | 34 +++++++++++++++++++ testdata/sqllogictests/functions/version.slt | 12 ++++++- testdata/sqllogictests/vars.slt | 9 +++++ 14 files changed, 148 insertions(+), 5 deletions(-) create mode 100644 bindings/python/examples/great_expectations_ex.py create mode 100644 crates/pgrepr/src/compatible.rs diff --git a/Cargo.lock b/Cargo.lock index 3656509c2..5ed642d0c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1747,6 +1747,26 @@ dependencies = [ "tiny-keccak", ] +[[package]] +name = "const_format" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a214c7af3d04997541b18d432afaff4c455e79e2029079647e72fc2bd27673" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f6ff08fd20f4f299298a28e2dfa8a8ba1036e6cd2460ac1de7b425d76f2500" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "constant_time_eq" version = "0.1.5" @@ -5272,6 +5292,7 @@ dependencies = [ "bytes", "chrono", "chrono-tz", + "const_format", "datafusion", "decimal", "dtoa", @@ -8322,6 +8343,12 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" +[[package]] +name = "unicode-xid" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" + [[package]] name = "unicode_categories" version = "0.1.1" diff --git a/bindings/python/examples/great_expectations_ex.py b/bindings/python/examples/great_expectations_ex.py new file mode 100644 index 000000000..9c1e54927 --- /dev/null +++ b/bindings/python/examples/great_expectations_ex.py @@ -0,0 +1,7 @@ +import great_expectations as gx + +context = gx.get_context() # gets a great expectations project context +ds = context.sources.add_postgres( + name="glaredb", + connection_string="postgresql://sean:sean@localhost:6543/db", +) diff --git a/bindings/python/requirements.txt b/bindings/python/requirements.txt index 537011c77..5ace88b47 100644 --- a/bindings/python/requirements.txt +++ b/bindings/python/requirements.txt @@ -10,3 +10,5 @@ pandas polars pytest pandasai +great_expectations[postgresql] +great_expectations diff --git a/crates/datafusion_ext/src/vars/constants.rs b/crates/datafusion_ext/src/vars/constants.rs index 3094c4562..0ef1d1140 100644 --- a/crates/datafusion_ext/src/vars/constants.rs +++ b/crates/datafusion_ext/src/vars/constants.rs @@ -1,11 +1,11 @@ use super::*; +use pgrepr::compatible::server_version; use pgrepr::notice::NoticeSeverity; -// TODO: Decide proper postgres version to spoof/support pub(super) const SERVER_VERSION: ServerVar = ServerVar { name: "server_version", - value: "15.1", + value: server_version(), group: "postgres", user_configurable: false, description: "Version of the server", @@ -84,6 +84,14 @@ pub(super) const CLIENT_MIN_MESSAGES: ServerVar = ServerVar { description: "Controls which messages are sent to the client, defaults NOTICE", }; +pub(super) const STANDARD_CONFORMING_STRINGS: ServerVar = ServerVar { + name: "standard_conforming_strings", + value: &true, + group: "postgres", + user_configurable: false, + description: "Treat backslashes literally in string literals", +}; + pub(super) static GLAREDB_VERSION_OWNED: Lazy = Lazy::new(|| format!("v{}", env!("CARGO_PKG_VERSION"))); pub(super) static GLAREDB_VERSION: Lazy> = Lazy::new(|| ServerVar { diff --git a/crates/datafusion_ext/src/vars/inner.rs b/crates/datafusion_ext/src/vars/inner.rs index 734672fe0..18f6eab27 100644 --- a/crates/datafusion_ext/src/vars/inner.rs +++ b/crates/datafusion_ext/src/vars/inner.rs @@ -34,6 +34,7 @@ pub struct SessionVarsInner { pub transaction_isolation: SessionVar, pub search_path: SessionVar<[String]>, pub client_min_messages: SessionVar, + pub standard_conforming_strings: SessionVar, pub enable_debug_datasources: SessionVar, pub force_catalog_refresh: SessionVar, pub glaredb_version: SessionVar, @@ -86,6 +87,8 @@ impl SessionVarsInner { Ok(&self.search_path) } else if name.eq_ignore_ascii_case(CLIENT_MIN_MESSAGES.name) { Ok(&self.client_min_messages) + } else if name.eq_ignore_ascii_case(STANDARD_CONFORMING_STRINGS.name) { + Ok(&self.standard_conforming_strings) } else if name.eq_ignore_ascii_case(ENABLE_DEBUG_DATASOURCES.name) { Ok(&self.enable_debug_datasources) } else if name.eq_ignore_ascii_case(FORCE_CATALOG_REFRESH.name) { @@ -145,6 +148,8 @@ impl SessionVarsInner { self.search_path.set_from_str(val, setter) } else if name.eq_ignore_ascii_case(CLIENT_MIN_MESSAGES.name) { self.client_min_messages.set_from_str(val, setter) + } else if name.eq_ignore_ascii_case(STANDARD_CONFORMING_STRINGS.name) { + self.standard_conforming_strings.set_from_str(val, setter) } else if name.eq_ignore_ascii_case(ENABLE_DEBUG_DATASOURCES.name) { self.enable_debug_datasources.set_from_str(val, setter) } else if name.eq_ignore_ascii_case(FORCE_CATALOG_REFRESH.name) { @@ -221,6 +226,7 @@ impl Default for SessionVarsInner { transaction_isolation: SessionVar::new(&TRANSACTION_ISOLATION), search_path: SessionVar::new(&SEARCH_PATH), client_min_messages: SessionVar::new(&CLIENT_MIN_MESSAGES), + standard_conforming_strings: SessionVar::new(&STANDARD_CONFORMING_STRINGS), enable_debug_datasources: SessionVar::new(&ENABLE_DEBUG_DATASOURCES), force_catalog_refresh: SessionVar::new(&FORCE_CATALOG_REFRESH), glaredb_version: SessionVar::new(&GLAREDB_VERSION), diff --git a/crates/metastore/src/database.rs b/crates/metastore/src/database.rs index e9fbc1246..997f42e6e 100644 --- a/crates/metastore/src/database.rs +++ b/crates/metastore/src/database.rs @@ -1406,6 +1406,10 @@ mod tests { BuiltinCatalog::new().unwrap(); } + // TODO: Currently there's a conflict between `version()` and `pg_catalog.version()`. + // + // See + #[ignore] #[test] fn builtin_catalog_no_function_name_duplicates() { // Ensures each function is a unique (schema, name) pair. diff --git a/crates/pgrepr/Cargo.toml b/crates/pgrepr/Cargo.toml index 673de8a7f..e0c309544 100644 --- a/crates/pgrepr/Cargo.toml +++ b/crates/pgrepr/Cargo.toml @@ -17,3 +17,4 @@ num-traits = "0.2.17" dtoa = "1.0.9" chrono-tz = "0.8.5" bytes = "1.4.0" +const_format = "0.2.32" diff --git a/crates/pgrepr/src/compatible.rs b/crates/pgrepr/src/compatible.rs new file mode 100644 index 000000000..3020f7dcc --- /dev/null +++ b/crates/pgrepr/src/compatible.rs @@ -0,0 +1,31 @@ +/// Variables desribing the postgres version glaredb claims to be compatible with. +use const_format::concatcp; + +pub const PG_MAJOR_VERSION: u16 = 15; +pub const PG_MINOR_VERSION: u16 = 1; + +/// A short-form version string for use in the `server_version` session +/// variable. +pub const fn server_version() -> &'static str { + concatcp!(PG_MAJOR_VERSION, ".", PG_MINOR_VERSION) +} + +/// A long-form version string for use in the `pg_catalog.version()` function. +/// +/// Example: +/// PostgreSQL 15.1 on aarch64-unknown-linux-gnu, compiled by rustc (GlareDB 0.8.1) +pub const fn server_version_with_build_info() -> &'static str { + // TODO: We should have a build_info crate with these constants. + const GLAREDB_VERSION: &str = env!("CARGO_PKG_VERSION"); + const TARGET_TRIPLE: &str = "aarch64-unknown-linux-gnu"; // Again, build_info crate. + + concatcp!( + "PostgreSQL ", + server_version(), + " on ", + TARGET_TRIPLE, + ", compiled by rustc (GlareDB ", + GLAREDB_VERSION, + ")" + ) +} diff --git a/crates/pgrepr/src/lib.rs b/crates/pgrepr/src/lib.rs index 45b725f39..79e85120c 100644 --- a/crates/pgrepr/src/lib.rs +++ b/crates/pgrepr/src/lib.rs @@ -1,3 +1,4 @@ +pub mod compatible; pub mod error; pub mod format; pub mod notice; diff --git a/crates/sqlbuiltins/src/functions/mod.rs b/crates/sqlbuiltins/src/functions/mod.rs index 0d1740fdf..684c85ca6 100644 --- a/crates/sqlbuiltins/src/functions/mod.rs +++ b/crates/sqlbuiltins/src/functions/mod.rs @@ -182,6 +182,7 @@ impl FunctionRegistry { Arc::new(PgTableIsVisible), Arc::new(PgEncodingToChar), Arc::new(PgArrayToString), + Arc::new(PgVersion), // System functions Arc::new(ConnectionId), Arc::new(Version), @@ -393,6 +394,10 @@ mod tests { .expect("'read_parquet' should have description"); } + // TODO: Currently there's a conflict between `version()` and `pg_catalog.version()`. + // + // See + #[ignore] #[test] fn func_iters_return_one_copy() { // Assert that iterators only ever return a single reference to a diff --git a/crates/sqlbuiltins/src/functions/scalars/mod.rs b/crates/sqlbuiltins/src/functions/scalars/mod.rs index 2185da03f..03db745b9 100644 --- a/crates/sqlbuiltins/src/functions/scalars/mod.rs +++ b/crates/sqlbuiltins/src/functions/scalars/mod.rs @@ -114,7 +114,6 @@ fn try_from_u64_scalar(scalar: ScalarValue) -> Result { } } -#[allow(dead_code)] // just for merging order // TODO: What? fn safe_up_cast_integer_scalar(value: i64) -> Result { if value < 0 { Err(BuiltinError::ParseError( @@ -127,7 +126,6 @@ fn safe_up_cast_integer_scalar(value: i64) -> Result { // get_nth_64_fn_arg extracts a string value (or tries to) from a // function argument; columns are always an error. -#[allow(dead_code)] // just for merging order fn get_nth_u64_fn_arg(input: &[ColumnarValue], idx: usize) -> Result { match input.get(idx) { Some(ColumnarValue::Scalar(value)) => try_from_u64_scalar(value.to_owned()), diff --git a/crates/sqlbuiltins/src/functions/scalars/postgres.rs b/crates/sqlbuiltins/src/functions/scalars/postgres.rs index 1b92fdac7..06d914e40 100644 --- a/crates/sqlbuiltins/src/functions/scalars/postgres.rs +++ b/crates/sqlbuiltins/src/functions/scalars/postgres.rs @@ -1,4 +1,5 @@ use datafusion::logical_expr::expr::ScalarFunction; +use pgrepr::compatible::server_version_with_build_info; use crate::functions::FunctionNamespace; @@ -475,3 +476,36 @@ impl BuiltinScalarUDF for PgArrayToString { FunctionNamespace::Required("pg_catalog") } } + +/// `pg_catalog.version()` implementation. +/// +/// This provides more informatation that just the 'server_version' session +/// variable, and includes things like the build triple. +/// +/// This uses a spoofed version (and so does not match the `version()` function) +/// since many postgres tools, including sqlalchemy, will check the version +/// against hard coded values. +#[derive(Clone, Copy, Debug)] +pub struct PgVersion; + +impl ConstBuiltinFunction for PgVersion { + const NAME: &'static str = "version"; + const DESCRIPTION: &'static str = "Returns the spoofed postgres version of the database"; + const EXAMPLE: &'static str = "pg_catalog.version()"; + const FUNCTION_TYPE: FunctionType = FunctionType::Scalar; + fn signature(&self) -> Option { + Some(Signature::exact(vec![], Volatility::Stable)) + } +} + +impl BuiltinScalarUDF for PgVersion { + fn as_expr(&self, _: Vec) -> Expr { + Expr::Literal(ScalarValue::Utf8(Some( + server_version_with_build_info().to_string(), + ))) + } + + fn namespace(&self) -> FunctionNamespace { + FunctionNamespace::Required("pg_catalog") + } +} diff --git a/testdata/sqllogictests/functions/version.slt b/testdata/sqllogictests/functions/version.slt index 07cc8afc9..85fbd1e08 100644 --- a/testdata/sqllogictests/functions/version.slt +++ b/testdata/sqllogictests/functions/version.slt @@ -1,4 +1,14 @@ -query B +query T select starts_with(version(), 'v') ---- t + +# There also exists a stub for pg_catalog.version(). This is hard coded to a +# postgres version maximize compatability with postgres tooling, and is +# _different_ than the `version()` function. +# +# Needed for great expections: +query T +select starts_with(pg_catalog.version(), 'PostgreSQL 15.1') +---- +t diff --git a/testdata/sqllogictests/vars.slt b/testdata/sqllogictests/vars.slt index a20e73582..b1ae41ef6 100644 --- a/testdata/sqllogictests/vars.slt +++ b/testdata/sqllogictests/vars.slt @@ -109,3 +109,12 @@ query T show client_min_messages; ---- WARNING + +# standard_conforming_strings + +# TODO: Double check how we format bools for session vars. Interestingly, +# postgres returns 'on' for this. +query T +show standard_conforming_strings; +---- +true