Skip to content

Commit

Permalink
feat: welcome presto to the suite of tested databases (apache#10498)
Browse files Browse the repository at this point in the history
* Add presto to the CI

Sample test data

Datetime conversion

Sample test data

Fix tests

* TODO to switch to timestamps

* Address feedback

* Update requirements

* Add TODOs

Co-authored-by: bogdan kyryliuk <bogdankyryliuk@dropbox.com>
  • Loading branch information
2 people authored and Ofeknielsen committed Oct 5, 2020
1 parent 2ceb06c commit 8a1be8e
Show file tree
Hide file tree
Showing 24 changed files with 440 additions and 104 deletions.
60 changes: 60 additions & 0 deletions .github/workflows/superset-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,66 @@ jobs:
- name: Test babel extraction
run: flask fab babel-extract --target superset/translations --output superset/translations/messages.pot --config superset/translations/babel.cfg -k _,__,t,tn,tct

test-postgres-presto:
runs-on: ubuntu-18.04
strategy:
matrix:
# run unit tests in multiple version just for fun
python-version: [3.8]
env:
PYTHONPATH: ${{ github.workspace }}
SUPERSET_CONFIG: tests.superset_test_config
REDIS_PORT: 16379
SUPERSET__SQLALCHEMY_DATABASE_URI:
postgresql+psycopg2://superset:superset@127.0.0.1:15432/superset
SUPERSET__SQLALCHEMY_EXAMPLES_URI:
presto://localhost:15433/memory/default
services:
postgres:
image: postgres:10-alpine
env:
POSTGRES_USER: superset
POSTGRES_PASSWORD: superset
ports:
# Use custom ports for services to avoid accidentally connecting to
# GitHub action runner's default installations
- 15432:5432
presto:
image: prestosql/presto:339
env:
POSTGRES_USER: superset
POSTGRES_PASSWORD: superset
ports:
# Use custom ports for services to avoid accidentally connecting to
# GitHub action runner's default installations
- 15433:8080
redis:
image: redis:5-alpine
ports:
- 16379:6379
steps:
- uses: actions/checkout@v2
- name: Setup Python
uses: actions/setup-python@v2.1.1
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
uses: apache-superset/cached-dependencies@b90713b
with:
run: |
apt-get-install
pip-upgrade
pip install -r requirements/testing.txt
setup-postgres
- name: Run celery
run: celery worker --app=superset.tasks.celery_app:app -Ofair -c 2 &
- name: Python unit tests (PostgreSQL)
run: |
./scripts/python_tests.sh
- name: Upload code coverage
run: |
bash <(curl -s https://codecov.io/bash) -cF python
test-postgres:
runs-on: ubuntu-18.04
strategy:
Expand Down
13 changes: 6 additions & 7 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ babel==2.8.0 # via flask-babel
backoff==1.10.0 # via apache-superset
billiard==3.6.3.0 # via celery
bleach==3.1.5 # via apache-superset
boto3==1.14.34 # via tabulator
botocore==1.17.34 # via boto3, s3transfer
boto3==1.14.36 # via tabulator
botocore==1.17.36 # via boto3, s3transfer
brotli==1.0.7 # via flask-compress
cached-property==1.5.1 # via tableschema
cachelib==0.1.1 # via apache-superset
Expand Down Expand Up @@ -55,9 +55,8 @@ geographiclib==1.50 # via geopy
geopy==2.0.0 # via apache-superset
gunicorn==20.0.4 # via apache-superset
humanize==2.5.0 # via apache-superset
idna-ssl==1.1.0 # via aiohttp
idna==2.10 # via email-validator, idna-ssl, requests, yarl
ijson==3.1.post0 # via tabulator
idna==2.10 # via email-validator, requests, yarl
ijson==3.1.1 # via tabulator
importlib-metadata==1.7.0 # via jsonschema, kombu, markdown
isodate==0.6.0 # via apache-superset, tableschema
itsdangerous==1.1.0 # via flask, flask-wtf
Expand Down Expand Up @@ -92,7 +91,7 @@ py==1.9.0 # via retry
pyarrow==0.17.1 # via apache-superset
pycparser==2.20 # via cffi
pydruid==0.6.1 # via apache-superset
pyhive[hive]==0.6.2 # via apache-superset
pyhive[hive]==0.6.3 # via apache-superset
pyjwt==1.7.1 # via flask-appbuilder, flask-jwt-extended
pyparsing==2.4.7 # via packaging
pyrsistent==0.16.0 # via jsonschema
Expand All @@ -119,7 +118,7 @@ tableschema==1.19.2 # via apache-superset
tabulator==1.52.3 # via tableschema
thrift-sasl==0.4.2 # via pyhive
thrift==0.13.0 # via apache-superset, pyhive, thrift-sasl
typing-extensions==3.7.4.2 # via aiohttp, yarl
typing-extensions==3.7.4.2 # via yarl
unicodecsv==0.14.1 # via tableschema, tabulator
urllib3==1.25.10 # via botocore, requests, selenium
vine==1.3.0 # via amqp, celery
Expand Down
5 changes: 2 additions & 3 deletions requirements/integration.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ distlib==0.3.1 # via virtualenv
filelock==3.0.12 # via tox, virtualenv
identify==1.4.25 # via pre-commit
importlib-metadata==1.7.0 # via pluggy, pre-commit, tox, virtualenv
importlib-resources==3.0.0 # via pre-commit, virtualenv
nodeenv==1.4.0 # via pre-commit
packaging==20.4 # via tox
pip-compile-multi==1.5.8 # via -r requirements/integration.in
Expand All @@ -26,8 +25,8 @@ six==1.15.0 # via packaging, pip-tools, tox, virtualenv
toml==0.10.1 # via pre-commit, tox
toposort==1.5 # via pip-compile-multi
tox==3.18.1 # via -r requirements/integration.in
virtualenv==20.0.29 # via pre-commit, tox
zipp==3.1.0 # via importlib-metadata, importlib-resources
virtualenv==20.0.30 # via pre-commit, tox
zipp==3.1.0 # via importlib-metadata

# The following packages are considered to be unsafe in a requirements file:
# pip
1 change: 1 addition & 0 deletions requirements/testing.in
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ flask-testing
openapi-spec-validator
openpyxl
parameterized
pyhive[presto]>=0.6.3
pylint
pytest
pytest-cov
Expand Down
3 changes: 2 additions & 1 deletion requirements/testing.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SHA1:785ae7ffcde3cee8ebcc0a839cdb8e61e693d329
# SHA1:e7b15a12c98ccce1cc4b8ee977205f141201b761
#
# This file is autogenerated by pip-compile-multi
# To update, run:
Expand All @@ -18,6 +18,7 @@ mccabe==0.6.1 # via pylint
more-itertools==8.4.0 # via pytest
openapi-spec-validator==0.2.9 # via -r requirements/testing.in
parameterized==0.7.4 # via -r requirements/testing.in
pyhive[hive,presto]==0.6.3 # via -r requirements/testing.in, apache-superset
pylint==2.5.3 # via -r requirements/testing.in
pytest-cov==2.10.0 # via -r requirements/testing.in
pytest==6.0.1 # via -r requirements/testing.in, pytest-cov
Expand Down
12 changes: 10 additions & 2 deletions superset/examples/birth_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,27 @@ def gen_filter(

def load_data(tbl_name: str, database: Database, sample: bool = False) -> None:
pdf = pd.read_json(get_example_data("birth_names.json.gz"))
pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
# TODO(bkyryliuk): move load examples data into the pytest fixture
if database.backend == "presto":
pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
pdf.ds = pdf.ds.dt.strftime("%Y-%m-%d %H:%M%:%S")
else:
pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
pdf = pdf.head(100) if sample else pdf

pdf.to_sql(
tbl_name,
database.get_sqla_engine(),
if_exists="replace",
chunksize=500,
dtype={
"ds": DateTime,
# TODO(bkyryliuk): use TIMESTAMP type for presto
"ds": DateTime if database.backend != "presto" else String(255),
"gender": String(16),
"state": String(10),
"name": String(255),
},
method="multi",
index=False,
)
print("Done loading table!")
Expand Down
15 changes: 11 additions & 4 deletions superset/examples/multiformat_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,24 @@ def load_multiformat_time_series(
if not only_metadata and (not table_exists or force):
data = get_example_data("multiformat_time_series.json.gz")
pdf = pd.read_json(data)
# TODO(bkyryliuk): move load examples data into the pytest fixture
if database.backend == "presto":
pdf.ds = pd.to_datetime(pdf.ds, unit="s")
pdf.ds = pdf.ds.dt.strftime("%Y-%m-%d")
pdf.ds2 = pd.to_datetime(pdf.ds2, unit="s")
pdf.ds2 = pdf.ds2.dt.strftime("%Y-%m-%d %H:%M%:%S")
else:
pdf.ds = pd.to_datetime(pdf.ds, unit="s")
pdf.ds2 = pd.to_datetime(pdf.ds2, unit="s")

pdf.ds = pd.to_datetime(pdf.ds, unit="s")
pdf.ds2 = pd.to_datetime(pdf.ds2, unit="s")
pdf.to_sql(
tbl_name,
database.get_sqla_engine(),
if_exists="replace",
chunksize=500,
dtype={
"ds": Date,
"ds2": DateTime,
"ds": String(255) if database.backend == "presto" else Date,
"ds2": String(255) if database.backend == "presto" else DateTime,
"epoch_s": BigInteger,
"epoch_ms": BigInteger,
"string0": String(100),
Expand Down
11 changes: 8 additions & 3 deletions superset/examples/random_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# under the License.

import pandas as pd
from sqlalchemy import DateTime
from sqlalchemy import DateTime, String

from superset import db
from superset.models.slice import Slice
Expand All @@ -36,13 +36,18 @@ def load_random_time_series_data(
if not only_metadata and (not table_exists or force):
data = get_example_data("random_time_series.json.gz")
pdf = pd.read_json(data)
pdf.ds = pd.to_datetime(pdf.ds, unit="s")
if database.backend == "presto":
pdf.ds = pd.to_datetime(pdf.ds, unit="s")
pdf.ds = pdf.ds.dt.strftime("%Y-%m-%d %H:%M%:%S")
else:
pdf.ds = pd.to_datetime(pdf.ds, unit="s")

pdf.to_sql(
tbl_name,
database.get_sqla_engine(),
if_exists="replace",
chunksize=500,
dtype={"ds": DateTime},
dtype={"ds": DateTime if database.backend != "presto" else String(255)},
index=False,
)
print("Done loading table!")
Expand Down
11 changes: 9 additions & 2 deletions superset/examples/world_bank.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,26 @@ def load_world_bank_health_n_pop( # pylint: disable=too-many-locals, too-many-s
data = get_example_data("countries.json.gz")
pdf = pd.read_json(data)
pdf.columns = [col.replace(".", "_") for col in pdf.columns]
pdf.year = pd.to_datetime(pdf.year)
if database.backend == "presto":
pdf.year = pd.to_datetime(pdf.year)
pdf.year = pdf.year.dt.strftime("%Y-%m-%d %H:%M%:%S")
else:
pdf.year = pd.to_datetime(pdf.year)
pdf = pdf.head(100) if sample else pdf

pdf.to_sql(
tbl_name,
database.get_sqla_engine(),
if_exists="replace",
chunksize=50,
dtype={
"year": DateTime(),
# TODO(bkyryliuk): use TIMESTAMP type for presto
"year": DateTime if database.backend != "presto" else String(255),
"country_code": String(3),
"country_name": String(255),
"region": String(255),
},
method="multi",
index=False,
)

Expand Down
7 changes: 7 additions & 0 deletions superset/utils/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,13 @@ def get_example_database() -> "Database":
return get_or_create_db("examples", db_uri)


def get_main_database() -> "Database":
from superset import conf

db_uri = conf.get("SQLALCHEMY_DATABASE_URI")
return get_or_create_db("main", db_uri)


def is_adhoc_metric(metric: Metric) -> bool:
return bool(
isinstance(metric, dict)
Expand Down
1 change: 1 addition & 0 deletions tests/base_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class SupersetTestCase(TestCase):
"sqlite": "main",
"mysql": "superset",
"postgresql": "public",
"presto": "default",
}

maxDiff = -1
Expand Down
Loading

0 comments on commit 8a1be8e

Please sign in to comment.