Skip to content

Commit d6e7140

Browse files
authored
Add support for Trino (#381)
1. Inherit from presto 2. Add travis test script 3. Add test cases
1 parent 1548ecc commit d6e7140

12 files changed

+365
-6
lines changed

.travis.yml

+5-5
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,15 @@ matrix:
66
# https://docs.python.org/devguide/#status-of-python-branches
77
# One build pulls latest versions dynamically
88
- python: 3.6
9-
env: CDH=cdh5 CDH_VERSION=5 PRESTO=RELEASE SQLALCHEMY=sqlalchemy>=1.3.0
9+
env: CDH=cdh5 CDH_VERSION=5 PRESTO=RELEASE TRINO=RELEASE SQLALCHEMY=sqlalchemy>=1.3.0
1010
- python: 3.6
11-
env: CDH=cdh5 CDH_VERSION=5.10.1 PRESTO=0.147 SQLALCHEMY=sqlalchemy>=1.3.0
11+
env: CDH=cdh5 CDH_VERSION=5.10.1 PRESTO=0.147 TRINO=351 SQLALCHEMY=sqlalchemy>=1.3.0
1212
- python: 3.5
13-
env: CDH=cdh5 CDH_VERSION=5.10.1 PRESTO=0.147 SQLALCHEMY=sqlalchemy>=1.3.0
13+
env: CDH=cdh5 CDH_VERSION=5.10.1 PRESTO=0.147 TRINO=351 SQLALCHEMY=sqlalchemy>=1.3.0
1414
- python: 3.4
15-
env: CDH=cdh5 CDH_VERSION=5.10.1 PRESTO=0.147 SQLALCHEMY=sqlalchemy>=1.3.0
15+
env: CDH=cdh5 CDH_VERSION=5.10.1 PRESTO=0.147 TRINO=351 SQLALCHEMY=sqlalchemy>=1.3.0
1616
- python: 2.7
17-
env: CDH=cdh5 CDH_VERSION=5.10.1 PRESTO=0.147 SQLALCHEMY=sqlalchemy>=1.3.0
17+
env: CDH=cdh5 CDH_VERSION=5.10.1 PRESTO=0.147 TRINO=351 SQLALCHEMY=sqlalchemy>=1.3.0
1818
install:
1919
- ./scripts/travis-install.sh
2020
- pip install codecov

README.rst

+11-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ DB-API
1717
------
1818
.. code-block:: python
1919
20-
from pyhive import presto # or import hive
20+
from pyhive import presto # or import hive or import trino
2121
cursor = presto.connect('localhost').cursor()
2222
cursor.execute('SELECT * FROM my_awesome_data LIMIT 10')
2323
print cursor.fetchone()
@@ -63,6 +63,8 @@ First install this package to register it with SQLAlchemy (see ``setup.py``).
6363
from sqlalchemy.schema import *
6464
# Presto
6565
engine = create_engine('presto://localhost:8080/hive/default')
66+
# Trino
67+
engine = create_engine('trino://localhost:8080/hive/default')
6668
# Hive
6769
engine = create_engine('hive://localhost:10000/default')
6870
logs = Table('my_awesome_data', MetaData(bind=engine), autoload=True)
@@ -79,12 +81,18 @@ Passing session configuration
7981
# DB-API
8082
hive.connect('localhost', configuration={'hive.exec.reducers.max': '123'})
8183
presto.connect('localhost', session_props={'query_max_run_time': '1234m'})
84+
trino.connect('localhost', session_props={'query_max_run_time': '1234m'})
8285
# SQLAlchemy
8386
create_engine(
8487
'presto://user@host:443/hive',
8588
connect_args={'protocol': 'https',
8689
'session_props': {'query_max_run_time': '1234m'}}
8790
)
91+
create_engine(
92+
'trino://user@host:443/hive',
93+
connect_args={'protocol': 'https',
94+
'session_props': {'query_max_run_time': '1234m'}}
95+
)
8896
create_engine(
8997
'hive://user@host:10000/database',
9098
connect_args={'configuration': {'hive.exec.reducers.max': '123'}},
@@ -102,11 +110,13 @@ Install using
102110

103111
- ``pip install 'pyhive[hive]'`` for the Hive interface and
104112
- ``pip install 'pyhive[presto]'`` for the Presto interface.
113+
- ``pip install 'pyhive[trino]'`` for the Trino interface
105114

106115
PyHive works with
107116

108117
- Python 2.7 / Python 3
109118
- For Presto: Presto install
119+
- For Trino: Trino install
110120
- For Hive: `HiveServer2 <https://cwiki.apache.org/confluence/display/Hive/Setting+up+HiveServer2>`_ daemon
111121

112122
Changelog

pyhive/sqlalchemy_trino.py

+73
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
"""Integration between SQLAlchemy and Trino.
2+
3+
Some code based on
4+
https://github.com/zzzeek/sqlalchemy/blob/rel_0_5/lib/sqlalchemy/databases/sqlite.py
5+
which is released under the MIT license.
6+
"""
7+
8+
from __future__ import absolute_import
9+
from __future__ import unicode_literals
10+
11+
import re
12+
from sqlalchemy import exc
13+
from sqlalchemy import types
14+
from sqlalchemy import util
15+
# TODO shouldn't use mysql type
16+
from sqlalchemy.databases import mysql
17+
from sqlalchemy.engine import default
18+
from sqlalchemy.sql import compiler
19+
from sqlalchemy.sql.compiler import SQLCompiler
20+
21+
from pyhive import trino
22+
from pyhive.common import UniversalSet
23+
from pyhive.sqlalchemy_presto import PrestoDialect, PrestoCompiler, PrestoIdentifierPreparer
24+
25+
class TrinoIdentifierPreparer(PrestoIdentifierPreparer):
26+
pass
27+
28+
29+
_type_map = {
30+
'boolean': types.Boolean,
31+
'tinyint': mysql.MSTinyInteger,
32+
'smallint': types.SmallInteger,
33+
'integer': types.Integer,
34+
'bigint': types.BigInteger,
35+
'real': types.Float,
36+
'double': types.Float,
37+
'varchar': types.String,
38+
'timestamp': types.TIMESTAMP,
39+
'date': types.DATE,
40+
'varbinary': types.VARBINARY,
41+
}
42+
43+
44+
class TrinoCompiler(PrestoCompiler):
45+
pass
46+
47+
48+
class TrinoTypeCompiler(PrestoCompiler):
49+
def visit_CLOB(self, type_, **kw):
50+
raise ValueError("Trino does not support the CLOB column type.")
51+
52+
def visit_NCLOB(self, type_, **kw):
53+
raise ValueError("Trino does not support the NCLOB column type.")
54+
55+
def visit_DATETIME(self, type_, **kw):
56+
raise ValueError("Trino does not support the DATETIME column type.")
57+
58+
def visit_FLOAT(self, type_, **kw):
59+
return 'DOUBLE'
60+
61+
def visit_TEXT(self, type_, **kw):
62+
if type_.length:
63+
return 'VARCHAR({:d})'.format(type_.length)
64+
else:
65+
return 'VARCHAR'
66+
67+
68+
class TrinoDialect(PrestoDialect):
69+
name = 'trino'
70+
71+
@classmethod
72+
def dbapi(cls):
73+
return trino

pyhive/tests/test_trino.py

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
"""Trino integration tests.
2+
3+
These rely on having a Trino+Hadoop cluster set up.
4+
They also require a tables created by make_test_tables.sh.
5+
"""
6+
7+
from __future__ import absolute_import
8+
from __future__ import unicode_literals
9+
10+
import contextlib
11+
import os
12+
import requests
13+
14+
from pyhive import exc
15+
from pyhive import trino
16+
from pyhive.tests.dbapi_test_case import DBAPITestCase
17+
from pyhive.tests.dbapi_test_case import with_cursor
18+
from pyhive.tests.test_presto import TestPresto
19+
import mock
20+
import unittest
21+
import datetime
22+
23+
_HOST = 'localhost'
24+
_PORT = '18080'
25+
26+
27+
class TestTrino(TestPresto):
28+
__test__ = True
29+
30+
def connect(self):
31+
return trino.connect(host=_HOST, port=_PORT, source=self.id())
32+
33+
def test_bad_protocol(self):
34+
self.assertRaisesRegexp(ValueError, 'Protocol must be',
35+
lambda: trino.connect('localhost', protocol='nonsense').cursor())
36+
37+
def test_escape_args(self):
38+
escaper = trino.TrinoParamEscaper()
39+
40+
self.assertEqual(escaper.escape_args((datetime.date(2020, 4, 17),)),
41+
("date '2020-04-17'",))
42+
self.assertEqual(escaper.escape_args((datetime.datetime(2020, 4, 17, 12, 0, 0, 123456),)),
43+
("timestamp '2020-04-17 12:00:00.123'",))
44+
45+
@with_cursor
46+
def test_description(self, cursor):
47+
cursor.execute('SELECT 1 AS foobar FROM one_row')
48+
self.assertEqual(cursor.description, [('foobar', 'integer', None, None, None, None, True)])
49+
self.assertIsNotNone(cursor.last_query_id)
50+
51+
@with_cursor
52+
def test_complex(self, cursor):
53+
cursor.execute('SELECT * FROM one_row_complex')
54+
# TODO Trino drops the union field
55+
56+
tinyint_type = 'tinyint'
57+
smallint_type = 'smallint'
58+
float_type = 'real'
59+
self.assertEqual(cursor.description, [
60+
('boolean', 'boolean', None, None, None, None, True),
61+
('tinyint', tinyint_type, None, None, None, None, True),
62+
('smallint', smallint_type, None, None, None, None, True),
63+
('int', 'integer', None, None, None, None, True),
64+
('bigint', 'bigint', None, None, None, None, True),
65+
('float', float_type, None, None, None, None, True),
66+
('double', 'double', None, None, None, None, True),
67+
('string', 'varchar', None, None, None, None, True),
68+
('timestamp', 'timestamp', None, None, None, None, True),
69+
('binary', 'varbinary', None, None, None, None, True),
70+
('array', 'array(integer)', None, None, None, None, True),
71+
('map', 'map(integer,integer)', None, None, None, None, True),
72+
('struct', 'row(a integer,b integer)', None, None, None, None, True),
73+
# ('union', 'varchar', None, None, None, None, True),
74+
('decimal', 'decimal(10,1)', None, None, None, None, True),
75+
])
76+
rows = cursor.fetchall()
77+
expected = [(
78+
True,
79+
127,
80+
32767,
81+
2147483647,
82+
9223372036854775807,
83+
0.5,
84+
0.25,
85+
'a string',
86+
'1970-01-01 00:00:00.000',
87+
b'123',
88+
[1, 2],
89+
{"1": 2, "3": 4}, # Trino converts all keys to strings so that they're valid JSON
90+
[1, 2], # struct is returned as a list of elements
91+
# '{0:1}',
92+
'0.1',
93+
)]
94+
self.assertEqual(rows, expected)
95+
# catch unicode/str
96+
self.assertEqual(list(map(type, rows[0])), list(map(type, expected[0])))

pyhive/trino.py

+144
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
"""DB-API implementation backed by Trino
2+
3+
See http://www.python.org/dev/peps/pep-0249/
4+
5+
Many docstrings in this file are based on the PEP, which is in the public domain.
6+
"""
7+
8+
from __future__ import absolute_import
9+
from __future__ import unicode_literals
10+
11+
import logging
12+
13+
import requests
14+
15+
# Make all exceptions visible in this module per DB-API
16+
from pyhive.common import DBAPITypeObject
17+
from pyhive.exc import * # noqa
18+
from pyhive.presto import Connection as PrestoConnection, Cursor as PrestoCursor, PrestoParamEscaper
19+
20+
try: # Python 3
21+
import urllib.parse as urlparse
22+
except ImportError: # Python 2
23+
import urlparse
24+
25+
# PEP 249 module globals
26+
apilevel = '2.0'
27+
threadsafety = 2 # Threads may share the module and connections.
28+
paramstyle = 'pyformat' # Python extended format codes, e.g. ...WHERE name=%(name)s
29+
30+
_logger = logging.getLogger(__name__)
31+
32+
33+
class TrinoParamEscaper(PrestoParamEscaper):
34+
pass
35+
36+
37+
_escaper = TrinoParamEscaper()
38+
39+
40+
def connect(*args, **kwargs):
41+
"""Constructor for creating a connection to the database. See class :py:class:`Connection` for
42+
arguments.
43+
44+
:returns: a :py:class:`Connection` object.
45+
"""
46+
return Connection(*args, **kwargs)
47+
48+
49+
class Connection(PrestoConnection):
50+
def __init__(self, *args, **kwargs):
51+
super().__init__(*args, **kwargs)
52+
53+
def cursor(self):
54+
"""Return a new :py:class:`Cursor` object using the connection."""
55+
return Cursor(*self._args, **self._kwargs)
56+
57+
58+
class Cursor(PrestoCursor):
59+
"""These objects represent a database cursor, which is used to manage the context of a fetch
60+
operation.
61+
62+
Cursors are not isolated, i.e., any changes done to the database by a cursor are immediately
63+
visible by other cursors or connections.
64+
"""
65+
66+
def execute(self, operation, parameters=None):
67+
"""Prepare and execute a database operation (query or command).
68+
69+
Return values are not defined.
70+
"""
71+
headers = {
72+
'X-Trino-Catalog': self._catalog,
73+
'X-Trino-Schema': self._schema,
74+
'X-Trino-Source': self._source,
75+
'X-Trino-User': self._username,
76+
}
77+
78+
if self._session_props:
79+
headers['X-Trino-Session'] = ','.join(
80+
'{}={}'.format(propname, propval)
81+
for propname, propval in self._session_props.items()
82+
)
83+
84+
# Prepare statement
85+
if parameters is None:
86+
sql = operation
87+
else:
88+
sql = operation % _escaper.escape_args(parameters)
89+
90+
self._reset_state()
91+
92+
self._state = self._STATE_RUNNING
93+
url = urlparse.urlunparse((
94+
self._protocol,
95+
'{}:{}'.format(self._host, self._port), '/v1/statement', None, None, None))
96+
_logger.info('%s', sql)
97+
_logger.debug("Headers: %s", headers)
98+
response = self._requests_session.post(
99+
url, data=sql.encode('utf-8'), headers=headers, **self._requests_kwargs)
100+
self._process_response(response)
101+
102+
def _process_response(self, response):
103+
"""Given the JSON response from Trino's REST API, update the internal state with the next
104+
URI and any data from the response
105+
"""
106+
# TODO handle HTTP 503
107+
if response.status_code != requests.codes.ok:
108+
fmt = "Unexpected status code {}\n{}"
109+
raise OperationalError(fmt.format(response.status_code, response.content))
110+
111+
response_json = response.json()
112+
_logger.debug("Got response %s", response_json)
113+
assert self._state == self._STATE_RUNNING, "Should be running if processing response"
114+
self._nextUri = response_json.get('nextUri')
115+
self._columns = response_json.get('columns')
116+
if 'id' in response_json:
117+
self.last_query_id = response_json['id']
118+
if 'X-Trino-Clear-Session' in response.headers:
119+
propname = response.headers['X-Trino-Clear-Session']
120+
self._session_props.pop(propname, None)
121+
if 'X-Trino-Set-Session' in response.headers:
122+
propname, propval = response.headers['X-Trino-Set-Session'].split('=', 1)
123+
self._session_props[propname] = propval
124+
if 'data' in response_json:
125+
assert self._columns
126+
new_data = response_json['data']
127+
self._decode_binary(new_data)
128+
self._data += map(tuple, new_data)
129+
if 'nextUri' not in response_json:
130+
self._state = self._STATE_FINISHED
131+
if 'error' in response_json:
132+
raise DatabaseError(response_json['error'])
133+
134+
135+
#
136+
# Type Objects and Constructors
137+
#
138+
139+
140+
# See types in trino-main/src/main/java/com/facebook/trino/tuple/TupleInfo.java
141+
FIXED_INT_64 = DBAPITypeObject(['bigint'])
142+
VARIABLE_BINARY = DBAPITypeObject(['varchar'])
143+
DOUBLE = DBAPITypeObject(['double'])
144+
BOOLEAN = DBAPITypeObject(['boolean'])
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
connector.name=hive-hadoop2
2+
hive.metastore.uri=thrift://localhost:9083
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
coordinator=true
2+
node-scheduler.include-coordinator=true
3+
http-server.http.port=18080
4+
query.max-memory=100MB
5+
query.max-memory-per-node=100MB
6+
discovery-server.enabled=true
7+
discovery.uri=http://localhost:18080

scripts/travis-conf/trino/jvm.config

Whitespace-only changes.

0 commit comments

Comments
 (0)