Skip to content

Commit

Permalink
Json UUID any
Browse files Browse the repository at this point in the history
Fixes ibis-project#1944. This does *not* add support for JSON, JSONB, and UUID
PostgreSQL types. Instead, it marks them as having `dt.Any` type,
allowing the tables to be loaded, and those columns mostly ignored.
There is also some black formatting on some lines I didn't touch...
Author: Ian Rose <ian.r.rose@gmail.com>
Author: Ivan Ogasawara <ivan.ogasawara@gmail.com>

Closes ibis-project#1962 from ian-r-rose/json-uuid-any and squashes the following commits:

6a878b0 [Ian Rose] Merge pull request #1 from Quansight/json-uuid-any
298efc2 [Ivan Ogasawara] Added JSON JSONB and UUID data types.
79bab94 [Ian Rose] Move test to postgres client test suite.
3748ef9 [Ian Rose] Add some light type testing for json, jsonb, uuid.
b514069 [Ian Rose] Allow postgres client to read tables with UUID, JSON, JSONB types.
  • Loading branch information
ian-r-rose authored and toryhaavik committed Oct 2, 2019
1 parent a57ac5f commit e8e1397
Show file tree
Hide file tree
Showing 6 changed files with 208 additions and 4 deletions.
86 changes: 84 additions & 2 deletions ibis/expr/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,23 @@ def _literal_value_hash_key(self, value):
return self, _tuplize(value.items())


class JSON(String):
"""JSON (JavaScript Object Notation) text format."""

scalar = ir.JSONScalar
column = ir.JSONColumn


class JSONB(Binary):
"""JSON (JavaScript Object Notation) data stored as a binary
representation, which eliminates whitespace, duplicate keys,
and key ordering.
"""

scalar = ir.JSONBScalar
column = ir.JSONBColumn


class GeoSpatial(DataType):
__slots__ = 'geotype', 'srid'

Expand Down Expand Up @@ -779,6 +796,17 @@ class MultiPolygon(GeoSpatial):
__slots__ = ()


class UUID(String):
"""A universally unique identifier (UUID) is a 128-bit number used to
identify information in computer systems.
"""

scalar = ir.UUIDScalar
column = ir.UUIDColumn

__slots__ = ()


# ---------------------------------------------------------------------
any = Any()
null = Null()
Expand Down Expand Up @@ -815,7 +843,11 @@ class MultiPolygon(GeoSpatial):
multilinestring = MultiLineString()
multipoint = MultiPoint()
multipolygon = MultiPolygon()

# json
json = JSON()
jsonb = JSONB()
# special string based data type
uuid = UUID()

_primitive_types = [
('any', any),
Expand Down Expand Up @@ -881,6 +913,9 @@ class Tokens:
MULTIPOINT = 28
MULTIPOLYGON = 29
SEMICOLON = 30
JSON = 31
JSONB = 32
UUID = 33

@staticmethod
def name(value):
Expand All @@ -891,7 +926,6 @@ def name(value):
(getattr(Tokens, n), n) for n in dir(Tokens) if n.isalpha() and n.isupper()
)


Token = collections.namedtuple('Token', ('type', 'value'))


Expand Down Expand Up @@ -1005,6 +1039,22 @@ def name(value):
),
)
]
+ [
# json data type
(
'(?P<{}>{})'.format(token.upper(), token),
lambda token, toktype=toktype: Token(toktype, token),
)
for token, toktype in zip(
# note: `jsonb` should be first to avoid conflict with `json`
('jsonb', 'json'),
(Tokens.JSONB, Tokens.JSON),
)
]
+ [
# special string based data types
('(?P<UUID>uuid)', lambda token: Token(Tokens.UUID, token))
]
+ [
# integers, for decimal spec
(r'(?P<INTEGER>\d+)', lambda token: Token(Tokens.INTEGER, int(token))),
Expand Down Expand Up @@ -1209,6 +1259,12 @@ def type(self) -> DataType:
| "multipolygon" ":" geotype
| "multipolygon" ";" srid ":" geotype
json : "json"
jsonb : "jsonb"
uuid : "uuid"
"""
if self._accept(Tokens.PRIMITIVE):
assert self.tok is not None
Expand Down Expand Up @@ -1322,6 +1378,13 @@ def type(self) -> DataType:
self._expect(Tokens.RBRACKET)
return Struct(names, types)

# json data types
elif self._accept(Tokens.JSON):
return JSON()

elif self._accept(Tokens.JSONB):
return JSONB()

# geo spatial data type
elif self._accept(Tokens.GEOMETRY):
return Geometry()
Expand Down Expand Up @@ -1431,6 +1494,10 @@ def type(self) -> DataType:

return MultiPolygon(geotype=geotype, srid=srid)

# special string based data types
elif self._accept(Tokens.UUID):
return UUID()

else:
raise SyntaxError('Type cannot be parsed: {}'.format(self.text))

Expand Down Expand Up @@ -1763,6 +1830,16 @@ def can_cast_variadic(
return castable(source.value_type, target.value_type)


@castable.register(JSON, JSON)
def can_cast_json(source, target, **kwargs):
return True


@castable.register(JSONB, JSONB)
def can_cast_jsonb(source, target, **kwargs):
return True


# geo spatial data type
# cast between same type, used to cast from/to geometry and geography
GEO_TYPES = (
Expand All @@ -1782,6 +1859,11 @@ def can_cast_geospatial(source, target, **kwargs):
return True


@castable.register(UUID, UUID)
def can_cast_special_string(source, target, **kwargs):
return True


# @castable.register(Map, Map)
# def can_cast_maps(source, target):
# return (source.equals(target) or
Expand Down
36 changes: 36 additions & 0 deletions ibis/expr/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,30 @@ class MapColumn(AnyColumn, MapValue):
pass # noqa: E701,E302


class JSONValue(StringValue):
pass # noqa: E701,E302


class JSONScalar(StringScalar, JSONValue):
pass # noqa: E701,E302


class JSONColumn(StringColumn, JSONValue):
pass # noqa: E701,E302


class JSONBValue(BinaryValue):
pass # noqa: E701,E302


class JSONBScalar(BinaryScalar, JSONBValue):
pass # noqa: E701,E302


class JSONBColumn(BinaryColumn, JSONBValue):
pass # noqa: E701,E302


class StructValue(AnyValue):
def __dir__(self):
return sorted(
Expand Down Expand Up @@ -909,6 +933,18 @@ class MultiPolygonColumn(GeoSpatialColumn, MultiPolygonValue): # noqa: E302
pass # noqa: E701


class UUIDValue(StringValue):
pass # noqa: E701,E302


class UUIDScalar(StringScalar, UUIDValue):
pass # noqa: E701,E302


class UUIDColumn(StringColumn, UUIDValue):
pass # noqa: E701,E302


class ListExpr(ColumnExpr, AnyValue):
@property
def values(self):
Expand Down
15 changes: 15 additions & 0 deletions ibis/sql/alchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,21 @@ def sa_double(_, satype, nullable=True):
return dt.Double(nullable=nullable)


@dt.dtype.register(PostgreSQLDialect, sa.dialects.postgresql.UUID)
def sa_uuid(_, satype, nullable=True):
return dt.UUID(nullable=nullable)


@dt.dtype.register(PostgreSQLDialect, sa.dialects.postgresql.JSON)
def sa_json(_, satype, nullable=True):
return dt.JSON(nullable=nullable)


@dt.dtype.register(PostgreSQLDialect, sa.dialects.postgresql.JSONB)
def sa_jsonb(_, satype, nullable=True):
return dt.JSONB(nullable=nullable)


if geospatial_supported:

@dt.dtype.register(SQLAlchemyDialect, (ga.Geometry, ga.types._GISType))
Expand Down
30 changes: 29 additions & 1 deletion ibis/sql/postgres/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
import ibis
import ibis.expr.datatypes as dt
import ibis.expr.types as ir
import ibis.sql.alchemy as alch # noqa: E402
from ibis.tests.util import assert_equal

pytest.importorskip('sqlalchemy')
sa = pytest.importorskip('sqlalchemy')
pytest.importorskip('psycopg2')

pytestmark = pytest.mark.postgresql
Expand Down Expand Up @@ -136,6 +137,33 @@ def test_schema_table():
assert isinstance(schema['tables'], ir.TableExpr)


def test_schema_type_conversion():
typespec = [
# name, type, nullable
('json', sa.dialects.postgresql.JSON, True, dt.JSON),
('jsonb', sa.dialects.postgresql.JSONB, True, dt.JSONB),
('uuid', sa.dialects.postgresql.UUID, True, dt.UUID),
]

sqla_types = []
ibis_types = []
for name, t, nullable, ibis_type in typespec:
sqla_type = sa.Column(name, t, nullable=nullable)
sqla_types.append(sqla_type)
ibis_types.append((name, ibis_type(nullable=nullable)))

# Create a table with placeholder stubs for JSON, JSONB, and UUID.
engine = sa.create_engine('postgresql://')
table = sa.Table('tname', sa.MetaData(bind=engine), *sqla_types)

# Check that we can correctly create a schema with dt.any for the
# missing types.
schema = alch.schema_from_table(table)
expected = ibis.schema(ibis_types)

assert_equal(schema, expected)


def test_interval_films_schema(con):
t = con.table("films")
assert t.len.type() == dt.Interval(unit="m")
Expand Down
31 changes: 31 additions & 0 deletions ibis/tests/all/test_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
""" Tests for json data types"""
import json

import pytest
from pytest import param

import ibis
from ibis.tests.backends import PostgreSQL

# add here backends that support json types
all_db_geo_supported = [PostgreSQL]


@pytest.mark.parametrize('data', [param({'status': True}, id='status')])
@pytest.mark.only_on_backends(all_db_geo_supported)
def test_json(backend, con, data, alltypes):
json_value = json.dumps(data)
lit = ibis.literal(json_value, type='json').name('tmp')
expr = alltypes[[alltypes.id, lit]].head(1)
df = expr.execute()
assert df['tmp'].iloc[0] == json_value


@pytest.mark.parametrize('data', [param({'status': True}, id='status')])
@pytest.mark.only_on_backends(all_db_geo_supported)
def test_jsonb(backend, con, data, alltypes):
jsonb_value = json.dumps(data).encode('utf8')
lit = ibis.literal(jsonb_value, type='jsonb').name('tmp')
expr = alltypes[[alltypes.id, lit]].head(1)
df = expr.execute()
assert df['tmp'].iloc[0] == jsonb_value
14 changes: 13 additions & 1 deletion ibis/tests/all/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import ibis
import ibis.expr.datatypes as dt
from ibis.tests.backends import Clickhouse, Impala, PySpark, Spark
from ibis.tests.backends import Clickhouse, Impala, PostgreSQL, PySpark, Spark


def test_string_col_is_unicode(backend, alltypes, df):
Expand Down Expand Up @@ -233,3 +233,15 @@ def test_string(backend, alltypes, df, result_func, expected_func):

expected = backend.default_series_rename(expected_func(df))
backend.assert_series_equal(result, expected)


@pytest.mark.parametrize(
'data, data_type',
[param('123e4567-e89b-12d3-a456-426655440000', 'uuid', id='uuid')],
)
@pytest.mark.only_on_backends([PostgreSQL])
def test_special_strings(backend, con, alltypes, data, data_type):
lit = ibis.literal(data, type=data_type).name('tmp')
expr = alltypes[[alltypes.id, lit]].head(1)
df = expr.execute()
assert df['tmp'].iloc[0] == data

0 comments on commit e8e1397

Please sign in to comment.