Skip to content

Commit

Permalink
Merge pull request #293 from Qrlew/fix_and_new_translators
Browse files Browse the repository at this point in the history
And new translators and fix existing ones
  • Loading branch information
ngrislain authored Oct 1, 2024
2 parents 9f97f45 + 1f6bd7b commit 99042c4
Show file tree
Hide file tree
Showing 18 changed files with 1,792 additions and 234 deletions.
23 changes: 18 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
- 5432:5432

mssql:
image: mcr.microsoft.com/mssql/server:2019-latest
image: mcr.microsoft.com/mssql/server:2019-CU28-ubuntu-20.04
env:
# Set the SA password
SA_PASSWORD: "Strong@Passw0rd"
Expand All @@ -43,14 +43,27 @@ jobs:
ports:
- 1433:1433
options: >-
--health-cmd "/opt/mssql-tools/bin/sqlcmd -S localhost -U SA -P 'Strong@Passw0rd' -Q 'SELECT 1'"
--health-cmd "/opt/mssql-tools18/bin/sqlcmd -C -S localhost -U SA -P 'Strong@Passw0rd' -Q 'SELECT 1'"
--health-interval 10s
--health-timeout 5s
--health-retries 5
mysql:
image: mysql:8.0
env:
# The MySQL docker container requires these environment variables to be set
# so we can create and migrate the test database.
# See: https://hub.docker.com/_/mysql
MYSQL_DATABASE: qrlew_mysql_test
MYSQL_ROOT_PASSWORD: qrlew_test
ports:
# Opens port 3306 on service container and host
# https://docs.github.com/en/actions/using-containerized-services/about-service-containers
- 3306:3306
# Before continuing, verify the mysql container is reachable from the ubuntu host
options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3
steps:
- uses: actions/checkout@v3
- name: Build
run: cargo build --features mssql,bigquery --verbose
run: cargo build --features mssql,bigquery,mysql --verbose
- name: Run tests
run: cargo test --features mssql,bigquery --verbose
run: cargo test --features mssql,bigquery,mysql --verbose
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.9.24] - 2024-09-27
### Fixed
- mssql and bigquery translator
### Added
- mysql, databricks, hive, redshift translators
- mysql io connection for testing
- tool to get tables prefix

## [0.9.23] - 2024-07-9
### Fixed
- fixing noise multiplier of the gaussian dp event which should be independent from the sensitivity.
Expand Down
7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]
authors = ["Nicolas Grislain <ng@sarus.tech>"]
name = "qrlew"
version = "0.9.23"
version = "0.9.24"
edition = "2021"
description = "Sarus Qrlew Engine"
documentation = "https://docs.rs/qrlew"
Expand Down Expand Up @@ -42,10 +42,15 @@ wiremock = { version = "0.6", optional = true }
tempfile = { version = "3.6.0", optional = true }
yup-oauth2 = { version = "9.0", optional = true }

# mysql dependencies
mysql = { version = "25.0.1", optional = true }
r2d2_mysql = { version = "25.0.0", optional = true }

[features]
# Use SQLite for tests and examples
sqlite = ["dep:rusqlite"]
mssql = ["dep:sqlx", "dep:tokio"]
mysql = ["dep:mysql", "dep:r2d2_mysql"]
bigquery = ["dep:gcp-bigquery-client", "dep:wiremock", "dep:tempfile", "dep:yup-oauth2", "dep:tokio"]
# Tests
checked_injections = []
Expand Down
53 changes: 53 additions & 0 deletions src/dialect_translation/bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ impl RelationToQueryTranslator for BigQueryTranslator {
kind: ast::CastKind::Cast,
}
}
fn cast_as_float(&self, expr: ast::Expr) -> ast::Expr {
ast::Expr::Cast {
expr: Box::new(expr),
data_type: ast::DataType::Float64,
format: None,
kind: ast::CastKind::Cast,
}
}
fn substr(&self, exprs: Vec<ast::Expr>) -> ast::Expr {
assert!(exprs.len() == 2);
function_builder("SUBSTR", exprs, false)
Expand Down Expand Up @@ -89,6 +97,51 @@ impl RelationToQueryTranslator for BigQueryTranslator {
})
.collect()
}
/// It converts EXTRACT(epoch FROM column) into
/// UNIX_SECONDS(CAST(col AS TIMESTAMP))
fn extract_epoch(&self, expr: ast::Expr) -> ast::Expr {
let cast = ast::Expr::Cast {
expr: Box::new(expr),
data_type: ast::DataType::Timestamp(None, ast::TimezoneInfo::None),
format: None,
kind: ast::CastKind::Cast,
};
function_builder("UNIX_SECONDS", vec![cast], false)
}
fn set_operation(
&self,
with: Vec<ast::Cte>,
operator: ast::SetOperator,
quantifier: ast::SetQuantifier,
left: ast::Select,
right: ast::Select,
) -> ast::Query {
// UNION in big query must use a quantifier that can be either
// ALL or Distinct.
let translated_quantifier = match quantifier {
ast::SetQuantifier::All => ast::SetQuantifier::All,
_ => ast::SetQuantifier::Distinct,
};
ast::Query {
with: (!with.is_empty()).then_some(ast::With {
recursive: false,
cte_tables: with,
}),
body: Box::new(ast::SetExpr::SetOperation {
op: operator,
set_quantifier: translated_quantifier,
left: Box::new(ast::SetExpr::Select(Box::new(left))),
right: Box::new(ast::SetExpr::Select(Box::new(right))),
}),
order_by: vec![],
limit: None,
offset: None,
fetch: None,
locks: vec![],
limit_by: vec![],
for_clause: None,
}
}
}

impl QueryToRelationTranslator for BigQueryTranslator {
Expand Down
61 changes: 61 additions & 0 deletions src/dialect_translation/databricks.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
use super::{function_builder, QueryToRelationTranslator, RelationToQueryTranslator};
use sqlparser::{ast, dialect::DatabricksDialect};

use crate::expr::{self};

#[derive(Clone, Copy)]
pub struct DatabricksTranslator;

impl RelationToQueryTranslator for DatabricksTranslator {
fn identifier(&self, value: &expr::Identifier) -> Vec<ast::Ident> {
value
.iter()
.map(|r| ast::Ident::with_quote('`', r))
.collect()
}

fn first(&self, expr: ast::Expr) -> ast::Expr {
expr
}

fn var(&self, expr: ast::Expr) -> ast::Expr {
function_builder("VARIANCE", vec![expr], false)
}

fn cast_as_text(&self, expr: ast::Expr) -> ast::Expr {
ast::Expr::Cast {
expr: Box::new(expr),
data_type: ast::DataType::String(None),
format: None,
kind: ast::CastKind::Cast,
}
}
fn cast_as_float(&self, expr: ast::Expr) -> ast::Expr {
function_builder("FLOAT", vec![expr], false)
}
/// It converts EXTRACT(epoch FROM column) into
/// UNIX_TIMESTAMP(col)
fn extract_epoch(&self, expr: ast::Expr) -> ast::Expr {
function_builder("UNIX_TIMESTAMP", vec![expr], false)
}

fn format_float_value(&self, value: f64) -> ast::Expr {
let max_precision = 37;
let formatted = if value.abs() < 1e-10 || value.abs() > 1e10 {
// If the value is too small or too large, switch to scientific notation
format!("{:.precision$e}", value, precision = max_precision)
} else {
// Otherwise, use the default float formatting with the specified precision
format!("{}", value)
};
ast::Expr::Value(ast::Value::Number(formatted, false))
}
}

impl QueryToRelationTranslator for DatabricksTranslator {
type D = DatabricksDialect;

fn dialect(&self) -> Self::D {
DatabricksDialect {}
}
}
154 changes: 154 additions & 0 deletions src/dialect_translation/hive.rs
Original file line number Diff line number Diff line change
@@ -1 +1,155 @@
use super::{function_builder, QueryToRelationTranslator, RelationToQueryTranslator};
use crate::{
expr::{self},
relation::{Join, Variant as _},
};
use sqlparser::{ast, dialect::HiveDialect};

#[derive(Clone, Copy)]
pub struct HiveTranslator;

// Using the same translations as in bigquery since it should be similar.
// HiveTranslator is not well tested at the moment.
impl RelationToQueryTranslator for HiveTranslator {
fn identifier(&self, value: &expr::Identifier) -> Vec<ast::Ident> {
value
.iter()
.map(|r| ast::Ident::with_quote('`', r))
.collect()
}

fn cte(&self, name: ast::Ident, _columns: Vec<ast::Ident>, query: ast::Query) -> ast::Cte {
ast::Cte {
alias: ast::TableAlias {
name,
columns: vec![],
},
query: Box::new(query),
from: None,
materialized: None,
}
}
fn first(&self, expr: ast::Expr) -> ast::Expr {
expr
}

fn mean(&self, expr: ast::Expr) -> ast::Expr {
function_builder("AVG", vec![expr], false)
}

fn var(&self, expr: ast::Expr) -> ast::Expr {
function_builder("VARIANCE", vec![expr], false)
}

fn std(&self, expr: ast::Expr) -> ast::Expr {
function_builder("STDDEV", vec![expr], false)
}
/// Converting LOG to LOG10
fn log(&self, expr: ast::Expr) -> ast::Expr {
function_builder("LOG10", vec![expr], false)
}
fn cast_as_text(&self, expr: ast::Expr) -> ast::Expr {
ast::Expr::Cast {
expr: Box::new(expr),
data_type: ast::DataType::String(None),
format: None,
kind: ast::CastKind::Cast,
}
}
fn cast_as_float(&self, expr: ast::Expr) -> ast::Expr {
ast::Expr::Cast {
expr: Box::new(expr),
data_type: ast::DataType::Float64,
format: None,
kind: ast::CastKind::Cast,
}
}
fn substr(&self, exprs: Vec<ast::Expr>) -> ast::Expr {
assert!(exprs.len() == 2);
function_builder("SUBSTR", exprs, false)
}
fn substr_with_size(&self, exprs: Vec<ast::Expr>) -> ast::Expr {
assert!(exprs.len() == 3);
function_builder("SUBSTR", exprs, false)
}
/// Converting MD5(X) to TO_HEX(MD5(X))
fn md5(&self, expr: ast::Expr) -> ast::Expr {
let md5_function = function_builder("MD5", vec![expr], false);
function_builder("TO_HEX", vec![md5_function], false)
}
fn random(&self) -> ast::Expr {
function_builder("RAND", vec![], false)
}
fn join_projection(&self, join: &Join) -> Vec<ast::SelectItem> {
join.left()
.schema()
.iter()
.map(|f| self.expr(&expr::Expr::qcol(Join::left_name(), f.name())))
.chain(
join.right()
.schema()
.iter()
.map(|f| self.expr(&expr::Expr::qcol(Join::right_name(), f.name()))),
)
.zip(join.schema().iter())
.map(|(expr, field)| ast::SelectItem::ExprWithAlias {
expr,
alias: field.name().into(),
})
.collect()
}
/// It converts EXTRACT(epoch FROM column) into
/// UNIX_SECONDS(CAST(col AS TIMESTAMP))
fn extract_epoch(&self, expr: ast::Expr) -> ast::Expr {
let cast = ast::Expr::Cast {
expr: Box::new(expr),
data_type: ast::DataType::Timestamp(None, ast::TimezoneInfo::None),
format: None,
kind: ast::CastKind::Cast,
};
function_builder("UNIX_SECONDS", vec![cast], false)
}

fn set_operation(
&self,
with: Vec<ast::Cte>,
operator: ast::SetOperator,
quantifier: ast::SetQuantifier,
left: ast::Select,
right: ast::Select,
) -> ast::Query {
// UNION in big query must use a quantifier that can be either
// ALL or Distinct.
let translated_quantifier = match quantifier {
ast::SetQuantifier::All => ast::SetQuantifier::All,
_ => ast::SetQuantifier::Distinct,
};
ast::Query {
with: (!with.is_empty()).then_some(ast::With {
recursive: false,
cte_tables: with,
}),
body: Box::new(ast::SetExpr::SetOperation {
op: operator,
set_quantifier: translated_quantifier,
left: Box::new(ast::SetExpr::Select(Box::new(left))),
right: Box::new(ast::SetExpr::Select(Box::new(right))),
}),
order_by: vec![],
limit: None,
offset: None,
fetch: None,
locks: vec![],
limit_by: vec![],
for_clause: None,
}
}
}

impl QueryToRelationTranslator for HiveTranslator {
type D = HiveDialect;

fn dialect(&self) -> Self::D {
HiveDialect {}
}
}
Loading

0 comments on commit 99042c4

Please sign in to comment.