Skip to content

Commit

Permalink
fix(bigquery): Support FORMAT_TIMESTAMP (#4383)
Browse files Browse the repository at this point in the history
  • Loading branch information
VaggelisD authored Nov 13, 2024
1 parent 858c5b1 commit 9ec61de
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 44 deletions.
35 changes: 22 additions & 13 deletions sqlglot/dialects/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def _build_time(args: t.List) -> exp.Func:

def _build_datetime(args: t.List) -> exp.Func:
if len(args) == 1:
return exp.TsOrDsToTimestamp.from_arg_list(args)
return exp.TsOrDsToDatetime.from_arg_list(args)
if len(args) == 2:
return exp.Datetime.from_arg_list(args)
return exp.TimestampFromParts.from_arg_list(args)
Expand Down Expand Up @@ -305,6 +305,13 @@ def _build_levenshtein(args: t.List) -> exp.Levenshtein:
)


def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]:
def _builder(args: t.List) -> exp.TimeToStr:
return exp.TimeToStr(this=expr_type(this=seq_get(args, 1)), format=seq_get(args, 0))

return _builder


class BigQuery(Dialect):
WEEK_OFFSET = -1
UNNEST_COLUMN_ONLY = True
Expand Down Expand Up @@ -463,9 +470,7 @@ class Parser(parser.Parser):
"DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub),
"DIV": binary_from_function(exp.IntDiv),
"EDIT_DISTANCE": _build_levenshtein,
"FORMAT_DATE": lambda args: exp.TimeToStr(
this=exp.TsOrDsToDate(this=seq_get(args, 1)), format=seq_get(args, 0)
),
"FORMAT_DATE": _build_format_time(exp.TsOrDsToDate),
"GENERATE_ARRAY": exp.GenerateSeries.from_arg_list,
"JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar),
"JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray),
Expand Down Expand Up @@ -508,9 +513,8 @@ class Parser(parser.Parser):
),
"TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)),
"TO_JSON_STRING": exp.JSONFormat.from_arg_list,
"FORMAT_DATETIME": lambda args: exp.TimeToStr(
this=exp.TsOrDsToTimestamp(this=seq_get(args, 1)), format=seq_get(args, 0)
),
"FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime),
"FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp),
}

FUNCTION_PARSERS = {
Expand Down Expand Up @@ -862,7 +866,8 @@ class Generator(generator.Generator):
exp.TsOrDsAdd: _ts_or_ds_add_sql,
exp.TsOrDsDiff: _ts_or_ds_diff_sql,
exp.TsOrDsToTime: rename_func("TIME"),
exp.TsOrDsToTimestamp: rename_func("DATETIME"),
exp.TsOrDsToDatetime: rename_func("DATETIME"),
exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"),
exp.Unhex: rename_func("FROM_HEX"),
exp.UnixDate: rename_func("UNIX_DATE"),
exp.UnixToTime: _unix_to_time_sql,
Expand Down Expand Up @@ -1051,16 +1056,20 @@ def table_parts(self, expression: exp.Table) -> str:
return super().table_parts(expression)

def timetostr_sql(self, expression: exp.TimeToStr) -> str:
if isinstance(expression.this, exp.TsOrDsToTimestamp):
this = expression.this
if isinstance(this, exp.TsOrDsToDatetime):
func_name = "FORMAT_DATETIME"
elif isinstance(this, exp.TsOrDsToTimestamp):
func_name = "FORMAT_TIMESTAMP"
else:
func_name = "FORMAT_DATE"
this = (
expression.this
if isinstance(expression.this, (exp.TsOrDsToTimestamp, exp.TsOrDsToDate))

time_expr = (
this
if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate))
else expression
)
return self.func(func_name, self.format_time(expression), this.this)
return self.func(func_name, self.format_time(expression), time_expr.this)

def eq_sql(self, expression: exp.EQ) -> str:
# Operands of = cannot be NULL in BigQuery
Expand Down
1 change: 1 addition & 0 deletions sqlglot/dialects/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,7 @@ class Generator(generator.Generator):
exp.DataType.Type.BINARY: "BLOB",
exp.DataType.Type.BPCHAR: "TEXT",
exp.DataType.Type.CHAR: "TEXT",
exp.DataType.Type.DATETIME: "TIMESTAMP",
exp.DataType.Type.FLOAT: "REAL",
exp.DataType.Type.NCHAR: "TEXT",
exp.DataType.Type.NVARCHAR: "TEXT",
Expand Down
11 changes: 8 additions & 3 deletions sqlglot/dialects/snowflake.py
Original file line number Diff line number Diff line change
Expand Up @@ -908,9 +908,6 @@ class Generator(generator.Generator):
),
exp.TimestampTrunc: timestamptrunc_sql(),
exp.TimeStrToTime: timestrtotime_sql,
exp.TimeToStr: lambda self, e: self.func(
"TO_CHAR", exp.cast(e.this, exp.DataType.Type.TIMESTAMP), self.format_time(e)
),
exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})",
exp.ToArray: rename_func("TO_ARRAY"),
exp.ToChar: lambda self, e: self.function_fallback_sql(e),
Expand Down Expand Up @@ -1147,3 +1144,11 @@ def jsonextract_sql(self, expression: exp.JSONExtract):
exp.ParseJSON(this=this) if this.is_string else this,
expression.expression,
)

def timetostr_sql(self, expression: exp.TimeToStr) -> str:
this = expression.this

if this.is_string:
this = exp.cast(this, exp.DataType.Type.TIMESTAMP)

return self.func("TO_CHAR", this, self.format_time(expression))
8 changes: 6 additions & 2 deletions sqlglot/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6495,6 +6495,10 @@ class TsOrDsToDate(Func):
arg_types = {"this": True, "format": False, "safe": False}


class TsOrDsToDatetime(Func):
pass


class TsOrDsToTime(Func):
pass

Expand Down Expand Up @@ -7798,8 +7802,8 @@ def cast(
existing_cast_type: DataType.Type = expr.to.this
new_cast_type: DataType.Type = data_type.this
types_are_equivalent = type_mapping.get(
existing_cast_type, existing_cast_type
) == type_mapping.get(new_cast_type, new_cast_type)
existing_cast_type, existing_cast_type.value
) == type_mapping.get(new_cast_type, new_cast_type.value)
if expr.is_type(data_type) or types_are_equivalent:
return expr

Expand Down
9 changes: 8 additions & 1 deletion sqlglot/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -3897,7 +3897,14 @@ def tsordstotimestamp_sql(self, expression: exp.TsOrDsToTimestamp) -> str:
if isinstance(this, exp.TsOrDsToTimestamp) or this.is_type(exp.DataType.Type.TIMESTAMP):
return self.sql(this)

return self.sql(exp.cast(this, exp.DataType.Type.TIMESTAMP))
return self.sql(exp.cast(this, exp.DataType.Type.TIMESTAMP, dialect=self.dialect))

def tsordstodatetime_sql(self, expression: exp.TsOrDsToDatetime) -> str:
this = expression.this
if isinstance(this, exp.TsOrDsToDatetime) or this.is_type(exp.DataType.Type.DATETIME):
return self.sql(this)

return self.sql(exp.cast(this, exp.DataType.Type.DATETIME, dialect=self.dialect))

def tsordstodate_sql(self, expression: exp.TsOrDsToDate) -> str:
this = expression.this
Expand Down
58 changes: 34 additions & 24 deletions tests/dialects/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,27 +557,6 @@ def test_bigquery(self):
"tsql": "SELECT CAST('2008-12-25 15:30:00' AS TIME)",
},
)
self.validate_all(
"SELECT FORMAT_DATE('%Y%m%d', '2023-12-25')",
write={
"bigquery": "SELECT FORMAT_DATE('%Y%m%d', '2023-12-25')",
"duckdb": "SELECT STRFTIME(CAST('2023-12-25' AS DATE), '%Y%m%d')",
},
)
self.validate_all(
"SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', DATETIME '2023-12-25 15:30:00')",
write={
"bigquery": "SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', CAST('2023-12-25 15:30:00' AS DATETIME))",
"duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%Y%m%d %H:%M:%S')",
},
)
self.validate_all(
"SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')",
write={
"bigquery": "SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')",
"duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%x')",
},
)
self.validate_all(
"SELECT COUNTIF(x)",
read={
Expand Down Expand Up @@ -685,7 +664,7 @@ def test_bigquery(self):
write={
"bigquery": "SELECT DATETIME_ADD('2023-01-01T00:00:00', INTERVAL '1' MILLISECOND)",
"databricks": "SELECT TIMESTAMPADD(MILLISECOND, '1', '2023-01-01T00:00:00')",
"duckdb": "SELECT CAST('2023-01-01T00:00:00' AS DATETIME) + INTERVAL '1' MILLISECOND",
"duckdb": "SELECT CAST('2023-01-01T00:00:00' AS TIMESTAMP) + INTERVAL '1' MILLISECOND",
"snowflake": "SELECT TIMESTAMPADD(MILLISECOND, '1', '2023-01-01T00:00:00')",
},
),
Expand All @@ -696,7 +675,7 @@ def test_bigquery(self):
write={
"bigquery": "SELECT DATETIME_SUB('2023-01-01T00:00:00', INTERVAL '1' MILLISECOND)",
"databricks": "SELECT TIMESTAMPADD(MILLISECOND, '1' * -1, '2023-01-01T00:00:00')",
"duckdb": "SELECT CAST('2023-01-01T00:00:00' AS DATETIME) - INTERVAL '1' MILLISECOND",
"duckdb": "SELECT CAST('2023-01-01T00:00:00' AS TIMESTAMP) - INTERVAL '1' MILLISECOND",
},
),
)
Expand All @@ -706,7 +685,7 @@ def test_bigquery(self):
write={
"bigquery": "SELECT DATETIME_TRUNC('2023-01-01T01:01:01', HOUR)",
"databricks": "SELECT DATE_TRUNC('HOUR', '2023-01-01T01:01:01')",
"duckdb": "SELECT DATE_TRUNC('HOUR', CAST('2023-01-01T01:01:01' AS DATETIME))",
"duckdb": "SELECT DATE_TRUNC('HOUR', CAST('2023-01-01T01:01:01' AS TIMESTAMP))",
},
),
)
Expand Down Expand Up @@ -2221,3 +2200,34 @@ def test_regexp_extract(self):
"databricks": "REGEXP_EXTRACT_ALL('a1_a2a3_a4A5a6', '(a)[0-9]')",
},
)

def test_format_temporal(self):
self.validate_all(
"SELECT FORMAT_DATE('%Y%m%d', '2023-12-25')",
write={
"bigquery": "SELECT FORMAT_DATE('%Y%m%d', '2023-12-25')",
"duckdb": "SELECT STRFTIME(CAST('2023-12-25' AS DATE), '%Y%m%d')",
},
)
self.validate_all(
"SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', DATETIME '2023-12-25 15:30:00')",
write={
"bigquery": "SELECT FORMAT_DATETIME('%Y%m%d %H:%M:%S', CAST('2023-12-25 15:30:00' AS DATETIME))",
"duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%Y%m%d %H:%M:%S')",
},
)
self.validate_all(
"SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')",
write={
"bigquery": "SELECT FORMAT_DATETIME('%x', '2023-12-25 15:30:00')",
"duckdb": "SELECT STRFTIME(CAST('2023-12-25 15:30:00' AS TIMESTAMP), '%x')",
},
)
self.validate_all(
"""SELECT FORMAT_TIMESTAMP("%b-%d-%Y", TIMESTAMP "2050-12-25 15:30:55+00")""",
write={
"bigquery": "SELECT FORMAT_TIMESTAMP('%b-%d-%Y', CAST('2050-12-25 15:30:55+00' AS TIMESTAMP))",
"duckdb": "SELECT STRFTIME(CAST(CAST('2050-12-25 15:30:55+00' AS TIMESTAMPTZ) AS TIMESTAMP), '%b-%d-%Y')",
"snowflake": "SELECT TO_CHAR(CAST(CAST('2050-12-25 15:30:55+00' AS TIMESTAMPTZ) AS TIMESTAMP), 'mon-DD-yyyy')",
},
)
2 changes: 1 addition & 1 deletion tests/dialects/test_clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def test_clickhouse(self):
},
write={
"clickhouse": "SELECT CAST('2020-01-01' AS Nullable(DateTime)) + INTERVAL '500' MICROSECOND",
"duckdb": "SELECT CAST('2020-01-01' AS DATETIME) + INTERVAL '500' MICROSECOND",
"duckdb": "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500' MICROSECOND",
"postgres": "SELECT CAST('2020-01-01' AS TIMESTAMP) + INTERVAL '500 MICROSECOND'",
},
)
Expand Down

0 comments on commit 9ec61de

Please sign in to comment.