Skip to content

Commit

Permalink
Merge pull request #284 from KantorSerhiy/databricks_dialect
Browse files Browse the repository at this point in the history
Databricks dialect
  • Loading branch information
EvgSkv authored Oct 3, 2023
2 parents 872b27a + 68a643d commit e2df0ce
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 6 deletions.
42 changes: 42 additions & 0 deletions compiler/dialect_libraries/databricks_library.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/python
#
# Copyright 2023 Logica Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

library = """
->(left:, right:) = {arg: left, value: right};
ArgMin(a) = SqlExpr(
"(ARRAY_AGG({arg} order by {value}))[1]",
{arg: a.arg, value: a.value});
ArgMax(a) = SqlExpr(
"(ARRAY_AGG({arg} order by {value} desc))[1]",
{arg: a.arg, value: a.value});
ArgMaxK(a, l) = SqlExpr(
"SLICE(ARRAY_AGG({arg} order by {value} desc), 1, {lim})",
{arg: a.arg, value: a.value, lim: l});
ArgMinK(a, l) = SqlExpr(
"SLICE(ARRAY_AGG({arg} order by {value}), 1, {lim})",
{arg: a.arg, value: a.value, lim: l});
RMatch(s, p) = SqlExpr(
"REGEXP_LIKE({s}, {p})",
{s: s, p: p});
RExtract(s, p, g) = SqlExpr(
"REGEXP_SUBSTR({s}, {p}, 1, 1, 'c', {g})",
{s: s, p: p, g: g});
Array(a) = SqlExpr(
"ARRAY_AGG({value} order by {arg})",
{arg: a.arg, value: a.value});
"""
66 changes: 60 additions & 6 deletions compiler/dialects.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,14 @@
from compiler.dialect_libraries import sqlite_library
from compiler.dialect_libraries import trino_library
from compiler.dialect_libraries import presto_library
from compiler.dialect_libraries import databricks_library
else:
from ..compiler.dialect_libraries import bq_library
from ..compiler.dialect_libraries import psql_library
from ..compiler.dialect_libraries import sqlite_library
from ..compiler.dialect_libraries import trino_library
from ..compiler.dialect_libraries import presto_library
from ..compiler.dialect_libraries import databricks_library

def Get(engine):
return DIALECTS[engine]()
Expand Down Expand Up @@ -62,7 +64,7 @@ def InfixOperators(self):

def Subscript(self, record, subscript, record_is_table):
return '%s.%s' % (record, subscript)

def LibraryProgram(self):
return bq_library.library

Expand Down Expand Up @@ -164,7 +166,7 @@ def InfixOperators(self):

def Subscript(self, record, subscript, record_is_table):
return '(%s).%s' % (record, subscript)

def LibraryProgram(self):
return psql_library.library

Expand Down Expand Up @@ -207,7 +209,7 @@ def InfixOperators(self):

def Subscript(self, record, subscript, record_is_table):
return '%s.%s' % (record, subscript)

def LibraryProgram(self):
return trino_library.library

Expand All @@ -228,7 +230,7 @@ class Presto(Dialect):

def Name(self):
return 'Presto'

def BuiltInFunctions(self):
return {
'Range': 'SEQUENCE(0, %s - 1)',
Expand All @@ -245,7 +247,7 @@ def InfixOperators(self):

def Subscript(self, record, subscript, record_is_table):
return '%s.%s' % (record, subscript)

def LibraryProgram(self):
return presto_library.library

Expand Down Expand Up @@ -330,11 +332,63 @@ def DecorateCombineRule(rule, var):
)
return rule

class Databricks(Dialect):
"""Databricks dialect"""

#TODO: add DATEDIFF and NOW function

def Name(self):
return 'Databricks'

def BuiltInFunctions(self):
return {
'ToString': 'CAST(%s AS STRING)',
'ToInt64': 'CAST(%s AS BIGINT)',
'ToFloat64': 'CAST(%s AS DOUBLE)',
'AnyValue': 'ANY_VALUE(%s)',
'ILike': '({0}::string ILIKE {1})',
'Like': '({0}::string LIKE {1})',
'Replace': 'REPLACE({0}::string, {1}, {2})',
'ArrayConcat': 'ARRAY_JOIN({0}, {1})',
'JsonExtract': 'GET_JSON_OBJECT({0}, {1})',
'JsonExtractScalar': 'GET_JSON_OBJECT({0}, {1})',
'Length': 'ARRAY_SIZE(%s)',
'DateDiff': 'DATEDIFF({0}, {1}, {2})',
'IsNull': '({0} IS NULL)',
'LogicalOr': 'BOOL_OR(%s)',
'LogicalAnd': 'BOOL AND(%s)'
}

def InfixOperators(self):
return {
'++': 'CONCAT(%s, %s)',
'in': 'ARRAY_CONTAINS(%s, %s)'
}

def Subscript(self, record, subscript):
return '%s.%s' % (record, subscript)

def LibraryProgram(self):
return databricks_library.library

def UnnestPhrase(self):
return 'explode({0}) AS pushkin({1})'

def ArrayPhrase(self):
return 'ARRAY(%s)'

def GroupBySpecBy(self):
return 'index'

def DecorateCombineRule(self, rule, var):
return rule

DIALECTS = {
'bigquery': BigQueryDialect,
'sqlite': SqLiteDialect,
'psql': PostgreSQL,
'presto': Presto,
'trino': Trino
'trino': Trino,
'databricks': Databricks
}

0 comments on commit e2df0ce

Please sign in to comment.