Repo housekeeping

dbt-labs · Oct 2, 2021 · 1134e2f · 1134e2f
1 parent f39169e
commit 1134e2f
Show file tree

Hide file tree

Showing 9 changed files with 69 additions and 84 deletions.
diff --git a/.bumpversion-dbt.cfg b/.bumpversion-dbt.cfg
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -27,4 +27,3 @@ first_value = 1
 first_value = 1
 
 [bumpversion:file:dbt/adapters/spark/__version__.py]
-
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -49,7 +49,6 @@ jobs:
           source env/bin/activate
           sudo apt-get install libsasl2-dev
           pip install -r dev_requirements.txt
-          bumpversion --config-file .bumpversion-dbt.cfg patch --new-version ${{env.version_number}}
           bumpversion --config-file .bumpversion.cfg patch --new-version ${{env.version_number}} --allow-dirty
           git status
 

diff --git a/README.md b/README.md
@@ -1,25 +1,21 @@
 <p align="center">
-  <img src="/etc/dbt-logo-full.svg" alt="dbt logo" width="500"/>
+  <img src="https://raw.githubusercontent.com/dbt-labs/dbt/ec7dee39f793aa4f7dd3dae37282cc87664813e4/etc/dbt-logo-full.svg" alt="dbt logo" width="500"/>
 </p>
 <p align="center">
-  <a href="https://circleci.com/gh/fishtown-analytics/dbt-spark/tree/master">
+  <a href="https://circleci.com/gh/dbt-labs/dbt-spark/tree/master">
     <img src="https://circleci.com/gh/fishtown-analytics/dbt-spark/tree/master.svg?style=svg" alt="CircleCI" />
   </a>
-  <a href="https://community.getdbt.com">
-    <img src="https://community.getdbt.com/badge.svg" alt="Slack" />
-  </a>
 </p>
 
-# dbt-spark
+**[dbt](https://www.getdbt.com/)** enables data analysts and engineers to transform their data using the same practices that software engineers use to build applications.
 
-This plugin ports [dbt](https://getdbt.com) functionality to Spark. It supports running dbt against Spark clusters that are hosted via Databricks (AWS + Azure), Amazon EMR, or Docker.
+dbt is the T in ELT. Organize, cleanse, denormalize, filter, rename, and pre-aggregate the raw data in your warehouse so that it's ready for analysis.
 
-We have not tested extensively against older versions of Apache Spark. The plugin uses syntax that requires version 2.2.0 or newer. Some features require Spark 3.0 and/or Delta Lake.
+## dbt-spark
 
-### Documentation
-For more information on using Spark with dbt, consult the dbt documentation:
-- [Spark profile](https://docs.getdbt.com/reference/warehouse-profiles/spark-profile/)
-- [Spark specific configs](https://docs.getdbt.com/reference/resource-configs/spark-configs/)
+The `dbt-spark` package contains all of the code enabling dbt to work with Apache Spark and Databricks. For more information on using dbt with Spark, consult [the docs](https://docs.getdbt.com/docs/profile-spark).
+
+We have not tested extensively against older versions of Apache Spark. The plugin uses syntax that requires version 2.2.0 or newer. Some features require Spark 3.0 and/or Delta Lake.
 
 ### Installation
 This plugin can be installed via pip. Depending on your connection method, you need to specify an extra requirement.

diff --git a/dev_requirements.txt b/dev_requirements.txt
@@ -1,3 +1,7 @@
+# install latest changes in dbt-core
+# TODO: how to automate switching from develop to version branches?
+git+https://github.com/dbt-labs/dbt.git@develop#egg=dbt-core&subdirectory=core
+
 freezegun==0.3.9
 pytest==6.0.2
 mock>=1.3.0

diff --git a/etc/dbt-logo-full.svg b/etc/dbt-logo-full.svg
diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.py b/setup.py
@@ -1,41 +1,65 @@
 #!/usr/bin/env python
-from setuptools import find_namespace_packages, setup
 import os
+import sys
 import re
 
+# require python 3.6 or newer
+if sys.version_info < (3, 6):
+    print('Error: dbt does not support this version of Python.')
+    print('Please upgrade to Python 3.6 or higher.')
+    sys.exit(1)
 
+
+# require version of setuptools that supports find_namespace_packages
+from setuptools import setup
+try:
+    from setuptools import find_namespace_packages
+except ImportError:
+    # the user has a downlevel version of setuptools.
+    print('Error: dbt requires setuptools v40.1.0 or higher.')
+    print('Please upgrade setuptools with "pip install --upgrade setuptools" '
+          'and try again')
+    sys.exit(1)
+
+
+# pull long description from README
 this_directory = os.path.abspath(os.path.dirname(__file__))
 with open(os.path.join(this_directory, 'README.md')) as f:
     long_description = f.read()
 
 
-package_name = "dbt-spark"
-
-
-# get this from a separate file
-def _dbt_spark_version():
+# get this package's version from dbt/adapters/<name>/__version__.py
+def _get_plugin_version_dict():
     _version_path = os.path.join(
         this_directory, 'dbt', 'adapters', 'spark', '__version__.py'
     )
-    _version_pattern = r'''version\s*=\s*["'](.+)["']'''
+    _semver = r'''(?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)'''
+    _pre = r'''((?P<prekind>a|b|rc)(?P<pre>\d+))?'''
+    _version_pattern = fr'''version\s*=\s*["']{_semver}{_pre}["']'''
     with open(_version_path) as f:
         match = re.search(_version_pattern, f.read().strip())
         if match is None:
             raise ValueError(f'invalid version at {_version_path}')
-        return match.group(1)
+        return match.groupdict()
 
 
-package_version = _dbt_spark_version()
-description = """The SparkSQL plugin for dbt (data build tool)"""
+def _get_plugin_version():
+    parts = _get_plugin_version_dict()
+    return "{major}.{minor}.{patch}{prekind}{pre}".format(**parts)
 
-dbt_version = '0.20.0rc2'
-# the package version should be the dbt version, with maybe some things on the
-# ends of it. (0.20.0rc2 vs 0.20.0rc2a1, 0.20.0rc2.1, ...)
-if not package_version.startswith(dbt_version):
-    raise ValueError(
-        f'Invalid setup.py: package_version={package_version} must start with '
-        f'dbt_version={dbt_version}'
-    )
+
+# require a compatible minor version (~=), prerelease if this is a prerelease
+def _get_dbt_core_version():
+    parts = _get_plugin_version_dict()
+    minor = "{major}.{minor}.0".format(**parts)
+    pre = (parts["prekind"]+"1" if parts["prekind"] else "")
+    return f"{minor}{pre}"
+
+
+package_name = "dbt-spark"
+package_version = _get_plugin_version()
+dbt_core_version = _get_dbt_core_version()
+description = """The Apache Spark adapter plugin for dbt"""
 
 odbc_extras = ['pyodbc>=4.0.30']
 pyhive_extras = [
@@ -52,14 +76,14 @@ def _dbt_spark_version():
     long_description=long_description,
     long_description_content_type='text/markdown',
 
-    author='Fishtown Analytics',
-    author_email='info@fishtownanalytics.com',
-    url='https://github.com/fishtown-analytics/dbt-spark',
+    author='dbt Labs',
+    author_email='info@dbtlabs.com',
+    url='https://github.com/dbt-labs/dbt-spark',
 
     packages=find_namespace_packages(include=['dbt', 'dbt.*']),
     include_package_data=True,
     install_requires=[
-        f'dbt-core=={dbt_version}',
+        'dbt-core~={}'.format(dbt_core_version),
         'sqlparams>=3.0.0',
     ],
     extras_require={

diff --git a/tox.ini b/tox.ini
@@ -8,44 +8,41 @@ basepython = python3
 commands = /bin/bash -c '$(which flake8) --select=E,W,F --ignore=W504 dbt/'
 passenv = DBT_INVOCATION_ENV
 deps =
-     -r{toxinidir}/dev_requirements.txt
+  -rdev_requirements.txt
 
 [testenv:unit]
 basepython = python3
 commands = /bin/bash -c '{envpython} -m pytest -v {posargs} test/unit'
 passenv = DBT_INVOCATION_ENV
 deps =
-    -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev_requirements.txt
+  -rdev_requirements.txt
+  -e.[all]
 
 [testenv:integration-spark-databricks-http]
 basepython = python3
 commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-http.dbtspec'
 passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_CLUSTER_NAME DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV
 deps =
-    -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev_requirements.txt
-    -e.
+  -rdev_requirements.txt
+  -e.[all]
 
 [testenv:integration-spark-databricks-odbc-cluster]
 basepython = python3
 commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-odbc-cluster.dbtspec'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_cluster {posargs} -n4 test/custom/*'
 passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_CLUSTER_NAME DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV ODBC_DRIVER
 deps =
-    -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev_requirements.txt
-    -e.
+  -rdev_requirements.txt
+  -e.[all]
 
 [testenv:integration-spark-databricks-odbc-sql-endpoint]
 basepython = python3
 commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-databricks-odbc-sql-endpoint.dbtspec'
            /bin/bash -c '{envpython} -m pytest -v -m profile_databricks_sql_endpoint {posargs} -n4 test/custom/*'
 passenv = DBT_DATABRICKS_HOST_NAME DBT_DATABRICKS_ENDPOINT DBT_DATABRICKS_TOKEN DBT_INVOCATION_ENV ODBC_DRIVER
 deps =
-    -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev_requirements.txt
-    -e.
+  -rdev_requirements.txt
+  -e.[all]
 
 
 [testenv:integration-spark-thrift]
@@ -54,6 +51,5 @@ commands = /bin/bash -c '{envpython} -m pytest -v test/integration/spark-thrift.
            /bin/bash -c '{envpython} -m pytest -v -m profile_apache_spark {posargs} -n4 test/custom/*'
 passenv = DBT_INVOCATION_ENV
 deps =
-    -r{toxinidir}/requirements.txt
-    -r{toxinidir}/dev_requirements.txt
-    -e.
+  -rdev_requirements.txt
+  -e.[all]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -27,4 +27,3 @@ first_value = 1
		first_value = 1

		[bumpversion:file:dbt/adapters/spark/__version__.py]