diff --git a/compiler/dialect_libraries/databricks_library.py b/compiler/dialect_libraries/databricks_library.py new file mode 100644 index 0000000..73d70a7 --- /dev/null +++ b/compiler/dialect_libraries/databricks_library.py @@ -0,0 +1,42 @@ +#!/usr/bin/python +# +# Copyright 2023 Logica Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +library = """ +->(left:, right:) = {arg: left, value: right}; +ArgMin(a) = SqlExpr( + "(ARRAY_AGG({arg} order by {value}))[1]", + {arg: a.arg, value: a.value}); +ArgMax(a) = SqlExpr( + "(ARRAY_AGG({arg} order by {value} desc))[1]", + {arg: a.arg, value: a.value}); +ArgMaxK(a, l) = SqlExpr( + "SLICE(ARRAY_AGG({arg} order by {value} desc), 1, {lim})", + {arg: a.arg, value: a.value, lim: l}); +ArgMinK(a, l) = SqlExpr( + "SLICE(ARRAY_AGG({arg} order by {value}), 1, {lim})", + {arg: a.arg, value: a.value, lim: l}); +RMatch(s, p) = SqlExpr( + "REGEXP_LIKE({s}, {p})", + {s: s, p: p}); +RExtract(s, p, g) = SqlExpr( + "REGEXP_SUBSTR({s}, {p}, 1, 1, 'c', {g})", + {s: s, p: p, g: g}); + +Array(a) = SqlExpr( + "ARRAY_AGG({value} order by {arg})", + {arg: a.arg, value: a.value}); + +""" \ No newline at end of file diff --git a/compiler/dialects.py b/compiler/dialects.py index 8b48ee3..a281f82 100755 --- a/compiler/dialects.py +++ b/compiler/dialects.py @@ -24,12 +24,14 @@ from compiler.dialect_libraries import sqlite_library from compiler.dialect_libraries import trino_library from compiler.dialect_libraries import presto_library + from compiler.dialect_libraries import databricks_library else: from ..compiler.dialect_libraries import bq_library from ..compiler.dialect_libraries import psql_library from ..compiler.dialect_libraries import sqlite_library from ..compiler.dialect_libraries import trino_library from ..compiler.dialect_libraries import presto_library + from ..compiler.dialect_libraries import databricks_library def Get(engine): return DIALECTS[engine]() @@ -62,7 +64,7 @@ def InfixOperators(self): def Subscript(self, record, subscript, record_is_table): return '%s.%s' % (record, subscript) - + def LibraryProgram(self): return bq_library.library @@ -164,7 +166,7 @@ def InfixOperators(self): def Subscript(self, record, subscript, record_is_table): return '(%s).%s' % (record, subscript) - + def LibraryProgram(self): return psql_library.library @@ -207,7 +209,7 @@ def InfixOperators(self): def Subscript(self, record, subscript, record_is_table): return '%s.%s' % (record, subscript) - + def LibraryProgram(self): return trino_library.library @@ -228,7 +230,7 @@ class Presto(Dialect): def Name(self): return 'Presto' - + def BuiltInFunctions(self): return { 'Range': 'SEQUENCE(0, %s - 1)', @@ -245,7 +247,7 @@ def InfixOperators(self): def Subscript(self, record, subscript, record_is_table): return '%s.%s' % (record, subscript) - + def LibraryProgram(self): return presto_library.library @@ -330,11 +332,63 @@ def DecorateCombineRule(rule, var): ) return rule +class Databricks(Dialect): + """Databricks dialect""" + + #TODO: add DATEDIFF and NOW function + + def Name(self): + return 'Databricks' + + def BuiltInFunctions(self): + return { + 'ToString': 'CAST(%s AS STRING)', + 'ToInt64': 'CAST(%s AS BIGINT)', + 'ToFloat64': 'CAST(%s AS DOUBLE)', + 'AnyValue': 'ANY_VALUE(%s)', + 'ILike': '({0}::string ILIKE {1})', + 'Like': '({0}::string LIKE {1})', + 'Replace': 'REPLACE({0}::string, {1}, {2})', + 'ArrayConcat': 'ARRAY_JOIN({0}, {1})', + 'JsonExtract': 'GET_JSON_OBJECT({0}, {1})', + 'JsonExtractScalar': 'GET_JSON_OBJECT({0}, {1})', + 'Length': 'ARRAY_SIZE(%s)', + 'DateDiff': 'DATEDIFF({0}, {1}, {2})', + 'IsNull': '({0} IS NULL)', + 'LogicalOr': 'BOOL_OR(%s)', + 'LogicalAnd': 'BOOL AND(%s)' + } + + def InfixOperators(self): + return { + '++': 'CONCAT(%s, %s)', + 'in': 'ARRAY_CONTAINS(%s, %s)' + } + + def Subscript(self, record, subscript): + return '%s.%s' % (record, subscript) + + def LibraryProgram(self): + return databricks_library.library + + def UnnestPhrase(self): + return 'explode({0}) AS pushkin({1})' + + def ArrayPhrase(self): + return 'ARRAY(%s)' + + def GroupBySpecBy(self): + return 'index' + + def DecorateCombineRule(self, rule, var): + return rule + DIALECTS = { 'bigquery': BigQueryDialect, 'sqlite': SqLiteDialect, 'psql': PostgreSQL, 'presto': Presto, - 'trino': Trino + 'trino': Trino, + 'databricks': Databricks }