apache · bkyryliuk · Aug 6, 2020 · Jul 31, 2020 · Aug 4, 2020 · Aug 5, 2020
@@ -92,6 +92,66 @@ jobs:
  - name: Test babel extraction
  run: flask fab babel-extract --target superset/translations --output superset/translations/messages.pot --config superset/translations/babel.cfg -k _,__,t,tn,tct
 
+ test-postgres-presto:
+ runs-on: ubuntu-18.04
+ strategy:
+ matrix:
+ # run unit tests in multiple version just for fun
+ python-version: [3.8]
+ env:
+ PYTHONPATH: ${{ github.workspace }}
+ SUPERSET_CONFIG: tests.superset_test_config
+ REDIS_PORT: 16379
+ SUPERSET__SQLALCHEMY_DATABASE_URI:
+ postgresql+psycopg2://superset:superset@127.0.0.1:15432/superset
+ SUPERSET__SQLALCHEMY_EXAMPLES_URI:
+ presto://localhost:15433/memory/default
+ services:
+ postgres:
+ image: postgres:10-alpine
+ env:
+ POSTGRES_USER: superset
+ POSTGRES_PASSWORD: superset
+ ports:
+ # Use custom ports for services to avoid accidentally connecting to
+ # GitHub action runner's default installations
+ - 15432:5432
+ presto:
+ image: prestosql/presto:339
+ env:
+ POSTGRES_USER: superset
+ POSTGRES_PASSWORD: superset
+ ports:
+ # Use custom ports for services to avoid accidentally connecting to
+ # GitHub action runner's default installations
+ - 15433:8080
+ redis:
+ image: redis:5-alpine
+ ports:
+ - 16379:6379
+ steps:
+ - uses: actions/checkout@v2
+ - name: Setup Python
+ uses: actions/setup-python@v2.1.1
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Install dependencies
+ uses: apache-superset/cached-dependencies@b90713b
+ with:
+ run: |
+ apt-get-install
+ pip-upgrade
+ pip install -r requirements/testing.txt
+ setup-postgres
+ - name: Run celery
+ run: celery worker --app=superset.tasks.celery_app:app -Ofair -c 2 &
+ - name: Python unit tests (PostgreSQL)
+ run: |
+ ./scripts/python_tests.sh
+ - name: Upload code coverage
+ run: |
+ bash <(curl -s https://codecov.io/bash) -cF python
+
  test-postgres:
  runs-on: ubuntu-18.04
  strategy:

diff --git a/requirements/base.txt b/requirements/base.txt
@@ -16,8 +16,8 @@ babel==2.8.0 # via flask-babel
 backoff==1.10.0 # via apache-superset
 billiard==3.6.3.0 # via celery
 bleach==3.1.5 # via apache-superset
-boto3==1.14.34 # via tabulator
-botocore==1.17.34 # via boto3, s3transfer
+boto3==1.14.36 # via tabulator
+botocore==1.17.36 # via boto3, s3transfer
 brotli==1.0.7 # via flask-compress
 cached-property==1.5.1 # via tableschema
 cachelib==0.1.1 # via apache-superset
@@ -55,9 +55,8 @@ geographiclib==1.50 # via geopy
 geopy==2.0.0 # via apache-superset
 gunicorn==20.0.4 # via apache-superset
 humanize==2.5.0 # via apache-superset
-idna-ssl==1.1.0 # via aiohttp
-idna==2.10 # via email-validator, idna-ssl, requests, yarl
-ijson==3.1.post0 # via tabulator
+idna==2.10 # via email-validator, requests, yarl
+ijson==3.1.1 # via tabulator
 importlib-metadata==1.7.0 # via jsonschema, kombu, markdown
 isodate==0.6.0 # via apache-superset, tableschema
 itsdangerous==1.1.0 # via flask, flask-wtf
@@ -92,7 +91,7 @@ py==1.9.0 # via retry
 pyarrow==0.17.1 # via apache-superset
 pycparser==2.20 # via cffi
 pydruid==0.6.1 # via apache-superset
-pyhive[hive]==0.6.2 # via apache-superset
+pyhive[hive]==0.6.3 # via apache-superset
 pyjwt==1.7.1 # via flask-appbuilder, flask-jwt-extended
 pyparsing==2.4.7 # via packaging
 pyrsistent==0.16.0 # via jsonschema
@@ -119,7 +118,7 @@ tableschema==1.19.2 # via apache-superset
 tabulator==1.52.3 # via tableschema
 thrift-sasl==0.4.2 # via pyhive
 thrift==0.13.0 # via apache-superset, pyhive, thrift-sasl
-typing-extensions==3.7.4.2 # via aiohttp, yarl
+typing-extensions==3.7.4.2 # via yarl
 unicodecsv==0.14.1 # via tableschema, tabulator
 urllib3==1.25.10 # via botocore, requests, selenium
 vine==1.3.0 # via amqp, celery

diff --git a/requirements/integration.txt b/requirements/integration.txt
@@ -12,7 +12,6 @@ distlib==0.3.1 # via virtualenv
 filelock==3.0.12 # via tox, virtualenv
 identify==1.4.25 # via pre-commit
 importlib-metadata==1.7.0 # via pluggy, pre-commit, tox, virtualenv
-importlib-resources==3.0.0 # via pre-commit, virtualenv
 nodeenv==1.4.0 # via pre-commit
 packaging==20.4 # via tox
 pip-compile-multi==1.5.8 # via -r requirements/integration.in
@@ -26,8 +25,8 @@ six==1.15.0 # via packaging, pip-tools, tox, virtualenv
 toml==0.10.1 # via pre-commit, tox
 toposort==1.5 # via pip-compile-multi
 tox==3.18.1 # via -r requirements/integration.in
-virtualenv==20.0.29 # via pre-commit, tox
-zipp==3.1.0 # via importlib-metadata, importlib-resources
+virtualenv==20.0.30 # via pre-commit, tox
+zipp==3.1.0 # via importlib-metadata
 
 # The following packages are considered to be unsafe in a requirements file:
 # pip
diff --git a/requirements/testing.in b/requirements/testing.in
@@ -20,6 +20,7 @@ flask-testing
 openapi-spec-validator
 openpyxl
 parameterized
+pyhive[presto]>=0.6.3
 pylint
 pytest
 pytest-cov

diff --git a/requirements/testing.txt b/requirements/testing.txt
@@ -1,4 +1,4 @@
-# SHA1:785ae7ffcde3cee8ebcc0a839cdb8e61e693d329
+# SHA1:e7b15a12c98ccce1cc4b8ee977205f141201b761
 #
 # This file is autogenerated by pip-compile-multi
 # To update, run:
@@ -18,6 +18,7 @@ mccabe==0.6.1 # via pylint
 more-itertools==8.4.0 # via pytest
 openapi-spec-validator==0.2.9 # via -r requirements/testing.in
 parameterized==0.7.4 # via -r requirements/testing.in
+pyhive[hive,presto]==0.6.3 # via -r requirements/testing.in, apache-superset
 pylint==2.5.3 # via -r requirements/testing.in
 pytest-cov==2.10.0 # via -r requirements/testing.in
 pytest==6.0.1 # via -r requirements/testing.in, pytest-cov

diff --git a/superset/examples/birth_names.py b/superset/examples/birth_names.py
@@ -54,19 +54,27 @@ def gen_filter(
 
 def load_data(tbl_name: str, database: Database, sample: bool = False) -> None:
  pdf = pd.read_json(get_example_data("birth_names.json.gz"))
- pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
+ # TODO(bkyryliuk): move load examples data into the pytest fixture
+ if database.backend == "presto":
+ pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
+ pdf.ds = pdf.ds.dt.strftime("%Y-%m-%d %H:%M%:%S")
+ else:
+ pdf.ds = pd.to_datetime(pdf.ds, unit="ms")
  pdf = pdf.head(100) if sample else pdf
+
  pdf.to_sql(
  tbl_name,
  database.get_sqla_engine(),
  if_exists="replace",
  chunksize=500,
  dtype={
- "ds": DateTime,
+ # TODO(bkyryliuk): use TIMESTAMP type for presto
+ "ds": DateTime if database.backend != "presto" else String(255),
  "gender": String(16),
  "state": String(10),
  "name": String(255),
  },
+ method="multi",
  index=False,
  )
  print("Done loading table!")

diff --git a/superset/examples/multiformat_time_series.py b/superset/examples/multiformat_time_series.py
@@ -44,17 +44,24 @@ def load_multiformat_time_series(
  if not only_metadata and (not table_exists or force):
  data = get_example_data("multiformat_time_series.json.gz")
  pdf = pd.read_json(data)
+ # TODO(bkyryliuk): move load examples data into the pytest fixture
+ if database.backend == "presto":
+ pdf.ds = pd.to_datetime(pdf.ds, unit="s")
+ pdf.ds = pdf.ds.dt.strftime("%Y-%m-%d")
+ pdf.ds2 = pd.to_datetime(pdf.ds2, unit="s")
+ pdf.ds2 = pdf.ds2.dt.strftime("%Y-%m-%d %H:%M%:%S")
+ else:
+ pdf.ds = pd.to_datetime(pdf.ds, unit="s")
+ pdf.ds2 = pd.to_datetime(pdf.ds2, unit="s")
 
- pdf.ds = pd.to_datetime(pdf.ds, unit="s")
- pdf.ds2 = pd.to_datetime(pdf.ds2, unit="s")
  pdf.to_sql(
  tbl_name,
  database.get_sqla_engine(),
  if_exists="replace",
  chunksize=500,
  dtype={
- "ds": Date,
- "ds2": DateTime,
+ "ds": String(255) if database.backend == "presto" else Date,
+ "ds2": String(255) if database.backend == "presto" else DateTime,
  "epoch_s": BigInteger,
  "epoch_ms": BigInteger,
  "string0": String(100),

diff --git a/superset/examples/random_time_series.py b/superset/examples/random_time_series.py
@@ -16,7 +16,7 @@
 # under the License.
 
 import pandas as pd
-from sqlalchemy import DateTime
+from sqlalchemy import DateTime, String
 
 from superset import db
 from superset.models.slice import Slice
@@ -36,13 +36,18 @@ def load_random_time_series_data(
  if not only_metadata and (not table_exists or force):
  data = get_example_data("random_time_series.json.gz")
  pdf = pd.read_json(data)
- pdf.ds = pd.to_datetime(pdf.ds, unit="s")
+ if database.backend == "presto":
+ pdf.ds = pd.to_datetime(pdf.ds, unit="s")
+ pdf.ds = pdf.ds.dt.strftime("%Y-%m-%d %H:%M%:%S")
+ else:
+ pdf.ds = pd.to_datetime(pdf.ds, unit="s")
+
  pdf.to_sql(
  tbl_name,
  database.get_sqla_engine(),
  if_exists="replace",
  chunksize=500,
- dtype={"ds": DateTime},
+ dtype={"ds": DateTime if database.backend != "presto" else String(255)},
  index=False,
  )
  print("Done loading table!")

diff --git a/superset/examples/world_bank.py b/superset/examples/world_bank.py
@@ -53,19 +53,26 @@ def load_world_bank_health_n_pop( # pylint: disable=too-many-locals, too-many-s
  data = get_example_data("countries.json.gz")
  pdf = pd.read_json(data)
  pdf.columns = [col.replace(".", "_") for col in pdf.columns]
- pdf.year = pd.to_datetime(pdf.year)
+ if database.backend == "presto":
+ pdf.year = pd.to_datetime(pdf.year)
+ pdf.year = pdf.year.dt.strftime("%Y-%m-%d %H:%M%:%S")
+ else:
+ pdf.year = pd.to_datetime(pdf.year)
  pdf = pdf.head(100) if sample else pdf
+
  pdf.to_sql(
  tbl_name,
  database.get_sqla_engine(),
  if_exists="replace",
  chunksize=50,
  dtype={
- "year": DateTime(),
+ # TODO(bkyryliuk): use TIMESTAMP type for presto
+ "year": DateTime if database.backend != "presto" else String(255),
  "country_code": String(3),
  "country_name": String(255),
  "region": String(255),
  },
+ method="multi",
  index=False,
  )
 

diff --git a/superset/utils/core.py b/superset/utils/core.py
@@ -1022,6 +1022,13 @@ def get_example_database() -> "Database":
  return get_or_create_db("examples", db_uri)
 
 
+def get_main_database() -> "Database":
+ from superset import conf
+
+ db_uri = conf.get("SQLALCHEMY_DATABASE_URI")
+ return get_or_create_db("main", db_uri)
+
+
 def is_adhoc_metric(metric: Metric) -> bool:
  return bool(
  isinstance(metric, dict)

diff --git a/tests/base_tests.py b/tests/base_tests.py
@@ -49,6 +49,7 @@ class SupersetTestCase(TestCase):
  "sqlite": "main",
  "mysql": "superset",
  "postgresql": "public",
+ "presto": "default",
  }
 
  maxDiff = -1