Skip to content

Commit

Permalink
Merged from master
Browse files Browse the repository at this point in the history
  • Loading branch information
xmnlab committed Jun 13, 2018
2 parents 5fec8d4 + efaffbf commit fe01689
Show file tree
Hide file tree
Showing 28 changed files with 708 additions and 169 deletions.
1 change: 1 addition & 0 deletions ci/requirements-dev-2.7.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ dependencies:
- python=2.7
- python-graphviz
- python-hdfs>=2.0.16
- pytz
- regex
- requests
- ruamel.yaml
Expand Down
1 change: 1 addition & 0 deletions ci/requirements-dev-3.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies:
- python=3.5
- python-graphviz
- python-hdfs>=2.0.16
- pytz
- regex
- requests
- ruamel.yaml
Expand Down
1 change: 1 addition & 0 deletions ci/requirements-dev-3.6.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ dependencies:
- python=3.6
- python-graphviz
- python-hdfs>=2.0.16
- pytz
- regex
- requests
- ruamel.yaml
Expand Down
1 change: 1 addition & 0 deletions ci/requirements-docs-3.6.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ dependencies:
- python=3.6
- python-graphviz
- python-hdfs>=2.0.16
- pytz
- regex
- six
- sphinx_rtd_theme<0.3
Expand Down
7 changes: 4 additions & 3 deletions ibis/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,10 +265,11 @@ def rename_partitioned_column(table_expr, bq_table):
# User configured partition column name default
col = ibis.options.bigquery.partition_col

# No renaming if the config option is set to None
if col is None:
# No renaming if the config option is set to None or the partition field
# is not _PARTITIONTIME
if col is None or partition_field != NATIVE_PARTITION_COL:
return table_expr
return table_expr.relabel({partition_field: col})
return table_expr.relabel({NATIVE_PARTITION_COL: col})


def parse_project_and_dataset(project, dataset):
Expand Down
32 changes: 28 additions & 4 deletions ibis/bigquery/compiler.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import datetime

from functools import partial

import regex as re

import six

import toolz

from multipledispatch import Dispatcher

import ibis
Expand Down Expand Up @@ -67,10 +71,15 @@ class BigQueryQueryBuilder(comp.QueryBuilder):
union_class = BigQueryUnion

def generate_setup_queries(self):
result = list(
map(partial(BigQueryUDFDefinition, context=self.context),
lin.traverse(find_bigquery_udf, self.expr)))
return result
queries = map(
partial(BigQueryUDFDefinition, context=self.context),
lin.traverse(find_bigquery_udf, self.expr)
)

# UDFs are uniquely identified by the name of the Node subclass we
# generate.
return list(
toolz.unique(queries, key=lambda x: type(x.expr.op()).__name__))


def build_ast(expr, context):
Expand Down Expand Up @@ -256,6 +265,21 @@ def _literal(translator, expr):
if not np.isfinite(value):
return 'CAST({!r} AS FLOAT64)'.format(str(value))

# special case literal timestamp, date, and time scalars
if isinstance(expr.op(), ops.Literal):
value = expr.op().value
if isinstance(expr, ir.DateScalar):
if isinstance(value, datetime.datetime):
raw_value = value.date()
else:
raw_value = value
return "DATE '{}'".format(raw_value)
elif isinstance(expr, ir.TimestampScalar):
return "TIMESTAMP '{}'".format(value)
elif isinstance(expr, ir.TimeScalar):
# TODO: define extractors on TimeValue expressions
return "TIME '{}'".format(value)

try:
return impala_compiler._literal(translator, expr)
except NotImplementedError:
Expand Down
20 changes: 18 additions & 2 deletions ibis/bigquery/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,17 @@ class TypeTranslationContext(object):
Notes
-----
This is used to translate INT64 types to FLOAT64 when INT64 is used in the
definition of a UDF.
This is used to raise an exception when INT64 types are encountered to
avoid suprising results due to BigQuery's handling of INT64 types in
JavaScript UDFs.
"""
__slots__ = ()


class UDFContext(TypeTranslationContext):
__slots__ = ()


ibis_type_to_bigquery_type = Dispatcher('ibis_type_to_bigquery_type')


Expand Down Expand Up @@ -66,3 +71,14 @@ def trans_timestamp(t, context):
@ibis_type_to_bigquery_type.register(dt.DataType, TypeTranslationContext)
def trans_type(t, context):
return str(t).upper()


@ibis_type_to_bigquery_type.register(dt.Integer, UDFContext)
def trans_integer_udf(t, context):
# JavaScript does not have integers, only a Number class. BigQuery doesn't
# behave as expected with INT64 inputs or outputs
raise TypeError(
'BigQuery does not support INT64 as an argument type or a return type '
'for UDFs. Replace INT64 with FLOAT64 in your UDF signature and '
'cast all INT64 inputs to FLOAT64.'
)
45 changes: 32 additions & 13 deletions ibis/bigquery/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,11 @@ def test_scalar_param_scope(alltypes):
FROM `ibis-gbq.testing.functional_alltypes`"""


def test_parted_column_rename(client, parted_alltypes):
assert 'PARTITIONTIME' in parted_alltypes.columns
assert '_PARTITIONTIME' in parted_alltypes.op().table.columns


def test_scalar_param_partition_time(parted_alltypes):
t = parted_alltypes
param = ibis.param('timestamp').name('time_param')
Expand All @@ -384,20 +389,10 @@ def test_exists_database(client):


@pytest.mark.parametrize('kind', ['date', 'timestamp'])
@pytest.mark.parametrize(
('option', 'expected_fn'),
[
(None, 'my_{}_parted_col'.format),
('PARTITIONTIME', lambda kind: 'PARTITIONTIME'),
('foo_bar', lambda kind: 'foo_bar'),
]
)
def test_parted_column(client, kind, option, expected_fn):
def test_parted_column(client, kind):
table_name = '{}_column_parted'.format(kind)
option_key = 'bigquery.partition_col'
with ibis.config.option_context(option_key, option):
t = client.table(table_name)
expected_column = expected_fn(kind)
t = client.table(table_name)
expected_column = 'my_{}_parted_col'.format(kind)
assert t.columns == [expected_column, 'string_col', 'int_col']


Expand Down Expand Up @@ -549,3 +544,27 @@ def test_client_sql_query(client):
result = expr.execute()
expected = client.table('functional_alltypes').head(20).execute()
tm.assert_frame_equal(result, expected)


def test_timestamp_column_parted_is_not_renamed(client):
t = client.table('timestamp_column_parted')
assert '_PARTITIONTIME' not in t.columns
assert 'PARTITIONTIME' not in t.columns


def test_prevent_rewrite(alltypes):
t = alltypes
expr = (t.groupby(t.string_col)
.aggregate(collected_double=t.double_col.collect())
.pipe(ibis.prevent_rewrite)
.filter(lambda t: t.string_col != 'wat'))
result = expr.compile()
expected = """\
SELECT *
FROM (
SELECT `string_col`, ARRAY_AGG(`double_col`) AS `collected_double`
FROM `ibis-gbq.testing.functional_alltypes`
GROUP BY 1
) t0
WHERE `string_col` != 'wat'"""
assert result == expected
73 changes: 73 additions & 0 deletions ibis/bigquery/tests/test_compiler.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
import datetime

import pytest

import pandas as pd

import ibis
import ibis.expr.datatypes as dt


pytestmark = pytest.mark.bigquery
pytest.importorskip('google.cloud.bigquery')


def test_timestamp_accepts_date_literals(alltypes):
date_string = '2009-03-01'
param = ibis.param(dt.timestamp).name('param_0')
Expand Down Expand Up @@ -76,3 +84,68 @@ def test_to_timestamp(alltypes, timezone):
SELECT PARSE_TIMESTAMP('%F', `date_string_col`) AS `tmp`
FROM `ibis-gbq.testing.functional_alltypes`"""
assert result == expected


@pytest.mark.parametrize(
('case', 'expected', 'dtype'),
[
(datetime.date(2017, 1, 1), "DATE '{}'".format('2017-01-01'), dt.date),
(
pd.Timestamp('2017-01-01'),
"DATE '{}'".format('2017-01-01'),
dt.date
),
('2017-01-01', "DATE '{}'".format('2017-01-01'), dt.date),
(
datetime.datetime(2017, 1, 1, 4, 55, 59),
"TIMESTAMP '{}'".format('2017-01-01 04:55:59'),
dt.timestamp,
),
(
'2017-01-01 04:55:59',
"TIMESTAMP '{}'".format('2017-01-01 04:55:59'),
dt.timestamp,
),
(
pd.Timestamp('2017-01-01 04:55:59'),
"TIMESTAMP '{}'".format('2017-01-01 04:55:59'),
dt.timestamp,
),
]
)
def test_literal_date(case, expected, dtype):
expr = ibis.literal(case, type=dtype).year()
result = ibis.bigquery.compile(expr)
assert result == "SELECT EXTRACT(year from {}) AS `tmp`".format(expected)


@pytest.mark.parametrize(
('case', 'expected', 'dtype'),
[
(
datetime.datetime(2017, 1, 1, 4, 55, 59),
"TIMESTAMP '{}'".format('2017-01-01 04:55:59'),
dt.timestamp,
),
(
'2017-01-01 04:55:59',
"TIMESTAMP '{}'".format('2017-01-01 04:55:59'),
dt.timestamp,
),
(
pd.Timestamp('2017-01-01 04:55:59'),
"TIMESTAMP '{}'".format('2017-01-01 04:55:59'),
dt.timestamp,
),
(
datetime.time(4, 55, 59),
"TIME '{}'".format('04:55:59'),
dt.time,
),
('04:55:59', "TIME '{}'".format('04:55:59'), dt.time),
]
)
def test_literal_timestamp_or_time(case, expected, dtype):
expr = ibis.literal(case, type=dtype).hour()
result = ibis.bigquery.compile(expr)
assert result == "SELECT EXTRACT(hour from {}) AS `tmp`".format(expected)
Loading

0 comments on commit fe01689

Please sign in to comment.