diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index aea5834004..dc34752f95 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -13,9 +13,9 @@ /amundsen-kube-helm/ @feng-tao @jornh @javamonkey79 -common/*.py @feng-tao @jinhyukchang @allisonsuarez @verdan @bolkedebruin @mgorsk1 -databuilder/*.py @feng-tao @jinhyukchang @allisonsuarez @dikshathakur3119 +common/*.py @feng-tao @jinhyukchang @allisonsuarez @verdan @bolkedebruin @mgorsk1 @dorianj +databuilder/*.py @feng-tao @jinhyukchang @allisonsuarez @dikshathakur3119 @dorianj @verdan frontend/amundsen_application/static/ @Golodhros @ttannis @allisonsuarez @dikshathakur3119 @feng-tao @dorianj -frontend/*.py @ttannis @feng-tao @dikshathakur3119 @allisonsuarez @dorianj +frontend/*.py @ttannis @feng-tao @dikshathakur3119 @allisonsuarez @dorianj @verdan metadata/*.py @feng-tao @jinhyukchang @allisonsuarez @dikshathakur3119 @verdan @bolkedebruin @mgorsk1 search/*.py @feng-tao @jinhyukchang @allisonsuarez @dikshathakur3119 @verdan @bolkedebruin @mgorsk1 diff --git a/.github/workflows/deploy_docs.yml b/.github/workflows/deploy_docs.yml index 239147aa11..292b9fb451 100644 --- a/.github/workflows/deploy_docs.yml +++ b/.github/workflows/deploy_docs.yml @@ -2,14 +2,14 @@ name: Publish docs via GitHub Pages on: push: branches: - - master + - main jobs: build: name: Deploy docs runs-on: ubuntu-latest steps: - - name: Checkout master + - name: Checkout main uses: actions/checkout@v1 - name: Checkout submodules using a PAT run: | diff --git a/.github/workflows/publish_release.yml b/.github/workflows/deploy_release.yml similarity index 58% rename from .github/workflows/publish_release.yml rename to .github/workflows/deploy_release.yml index d123d8456a..c657059f53 100644 --- a/.github/workflows/publish_release.yml +++ b/.github/workflows/deploy_release.yml @@ -1,15 +1,19 @@ -name: Publish Release +name: Publish Release to PyPi on: - push: - branches: - - master - paths: - # triggered when version is bumped - - "setup.py" - - "CHANGELOG.md" -jobs: release: + types: [published] +jobs: + gather-info: runs-on: ubuntu-latest + steps: + - name: Get module for release + id: module_folder + run: echo "::set-output name=module_folder::$(awk '{print $1}' << "${{ github.event.release.title }}")" + outputs: + module_folder: ${{ steps.module_folder.outputs.module_folder }} + push-to-pypi: + runs-on: ubuntu-latest + needs: gather-info steps: - name: Checkout code uses: actions/checkout@v2 @@ -17,31 +21,20 @@ jobs: uses: actions/setup-python@v1 with: python-version: 3.6 - - name: Get version - id: vers - run: | - pip install python-semantic-release - echo ::set-output name=version::$(semantic-release print-version --current) - - name: Create release - id: create_release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: ${{ steps.vers.outputs.version }} - release_name: New Features & Bug Fixes - body_path: ./CHANGELOG.md - name: Add wheel dependency run: pip install wheel - name: Generate dist run: python setup.py sdist bdist_wheel + working-directory: ${{ needs.gather-info.outputs.module_folder }} - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@master with: user: __token__ password: ${{ secrets.pypi_password }} + packages_dir: ${{ needs.gather-info.outputs.module_folder }}/dist build-and-publish-docker-image: name: Build and publish docker image + needs: gather-info runs-on: ubuntu-18.04 steps: - name: Checkout @@ -51,5 +44,7 @@ jobs: - name: Publish to Registry for latest if: success() run: make build-push-image-latest + working-directory: ${{ needs.gather-info.outputs.module_folder }} - name: Publish to Registry for version run: make build-push-image-version + working-directory: ${{ needs.gather-info.outputs.module_folder }} diff --git a/.github/workflows/metadata_pull_request.yml b/.github/workflows/metadata_pull_request.yml index 81081a8981..dd829b480e 100644 --- a/.github/workflows/metadata_pull_request.yml +++ b/.github/workflows/metadata_pull_request.yml @@ -24,4 +24,5 @@ jobs: working-directory: ./metadata - name: Codecov uses: codecov/codecov-action@v1 - working-directory: ./metadata + with: + directory: ./metadata diff --git a/.mailmap b/.mailmap new file mode 100644 index 0000000000..a111dbef5d --- /dev/null +++ b/.mailmap @@ -0,0 +1,170 @@ + Aarni Alasaarela + Adam Boscarino + Adam Boscarino + Adam Weiss + Alagappan + Alex Kompos + Alex Levene <37639249+alevene@users.noreply.github.com> + Allison Doami + Allison Suarez Miranda <22477579+allisonsuarez@users.noreply.github.com> + Alyssa + Anders Hafreager + Andrew Ciambrone + Anna Elisabeth + Anurag870 + Arjun Landes + ArthurEgide + Ashutosh Sanzgiri + Austin Gibbons + Ayush Chauhan <10010065+ayushchauhan0811@users.noreply.github.com> + Ayush Chauhan <57655135+ayush-san@users.noreply.github.com> + Ben Rifkind + Bongso, Suriyanto <1008875+suriyanto@users.noreply.github.com> + Bruno Nirello <30416677+bruno-nirello@users.noreply.github.com> + Bryan Fox <39674+bryfox@users.noreply.github.com> + Carter Landis + Craig Rosie + Cristiano Perez + Damon + Daniel + Daniel Mateus Pires + Daniel Mateus Pires + Daniel Won + Daren Sin + Dave Cameron + Dave Cameron + David Manukian <53464203+davidmanukian@users.noreply.github.com> + Derek + Diksha Thakur <44571635+dikshathakur3119@users.noreply.github.com> + Dmitriy Kunitskiy + Dorian Johnson <2019@dorianj.net> + Dorian Johnson <2020@dorianj.net> + Florent Bédécarrats + Gee + Gerard Toonstra + Guido Schmutz + Hugo Hobson + Itay Levy + Jack Leow + Jacob Kim + Jacob Kim + Jacob Scherffenberg + Jakub Hettler + James Davidheiser + Jan Kyri + Jin Hyuk Chang + Jin Hyuk Chang + Joao Correia + John Cheng + Jonas Brunsgaard + Jonathan Hehir + Jonathan Vieira + Joseph Atkins-Turkish + Joseph Atkins-Turkish + Josh Howard + Josh Stanfield + Joshua Hoskins + Junaid Effendi + Junda Yang + Justin Kenny <63694689+JustinKenny@users.noreply.github.com> + Kent Murra + Lingkai Kong + Louis Simoneau + Luis Garcés-Erice + Lukas Tarasevicius + Luke Lowery + Lyman Gillispie + Madison Bowden <52679885+madison-ookla@users.noreply.github.com> + Madison Swain-Bowden <52679885+madison-ookla@users.noreply.github.com> + Magnus Runesson + Marcos Iglesias <190833+Golodhros@users.noreply.github.com> + Mariusz Strzelecki + Mariusz Strzelecki + Mark Grover + Mark Grover + Markus Thurner + Martin Traverso + Mikhail Ivanov + Nanne + Nanne + Nathan Lawrence <54294432+nathanlawrence-asana@users.noreply.github.com> + Neil Shah + Nipun Agarwal + Norman Cheng + Oliver Marshall + Pablo Torre + Palash Das + PaschalisDim <48329279+PaschalisDim@users.noreply.github.com> + Pathuri Sai Harish + Paul Bergeron + Pedro Gonçalves Rossi Rodrigues + Philippe Mizrahi <39501569+philippemizrahi@users.noreply.github.com> + Radhakrishna Pemmasani + Rafael A. Mejia + Robert Yi + Ryan Lieu + Sahithi Reddy Velma <46659774+sahithi03@users.noreply.github.com> + Sarah Harvey + Sarthak Killedar + Saulius Grigaliunas + Shaun Elliott + Shenghu Yang + Shuichiro MAKIGAKI + Simon Brugman + Stacey Watro + Tamika Tannis + Tamika Tannis + Tao Feng + Tao Feng + Tao feng + Travis Roesner + Van-Duyet Le <5009534+duyetdev@users.noreply.github.com> + Verdan Mahmood + Wonyeong Choi + Xuan <65048031+crazy-2020@users.noreply.github.com> + Yeshwanth Kumar + Youngil Choi <56865498+duddlf23@users.noreply.github.com> + Zack Wu <39970089+instazackwu@users.noreply.github.com> + abhinay04 <68522211+abhinay04@users.noreply.github.com> + allcontributors[bot] <46447321+allcontributors[bot]@users.noreply.github.com> + aparna-cimpress <83464696+aparna-cimpress@users.noreply.github.com> + bluefa + bogo96 + bolkedebruin + christina stead + christina stead + damon09273@gmail.com + dechoma + dependabot-preview[bot] <27856297+dependabot-preview[bot]@users.noreply.github.com> + dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> + dwarszawski + friendtocephalopods <52580251+friendtocephalopods@users.noreply.github.com> + giomerlin <56041379+giomerlin@users.noreply.github.com> + github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> + hoskins <52580251+friendtocephalopods@users.noreply.github.com> + iamtodor + joeybaer <35610156+joeybaer@users.noreply.github.com> + jonhehir + jornh + lelandtran + lexisantoro <43476866+lexisantoro@users.noreply.github.com> + louis <1160507+naiaden@users.noreply.github.com> + lyft-metaservice-3 <37090125+lyft-metaservice-3@users.noreply.github.com> + mborukhava <43936420+mborukhava@users.noreply.github.com> + mgorsk1 + minsu + rogertangcn + rosejcday <16930240+rosejcday@users.noreply.github.com> + salilk957 <49253336+salilk957@users.noreply.github.com> + samshuster + sewardgw + sewardgw + sinkuladis + sshuster + sterlins + tianr.zhou <61992569+tianruzhou-db@users.noreply.github.com> + tianru zhou <61560998+UnbalancedShard@users.noreply.github.com> + verdan + zhmin + Đặng Minh Dũng + 之东 <0x4ec7@gmail.com> diff --git a/README.md b/README.md index 96d10f7ee7..c494d38f61 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,7 @@ Please visit [Architecture](./docs/architecture.md#architecture) for Amundsen ar - [Apache Druid](https://druid.apache.org/) - [Apache Hive](https://hive.apache.org/) - CSV +- [dbt](https://www.getdbt.com/) - [Delta Lake](https://delta.io/) - [Google BigQuery](https://cloud.google.com/bigquery) - [IBM DB2](https://www.ibm.com/analytics/db2) diff --git a/common/amundsen_common/models/lineage.py b/common/amundsen_common/models/lineage.py index 1015b3a24d..50d8e5131b 100644 --- a/common/amundsen_common/models/lineage.py +++ b/common/amundsen_common/models/lineage.py @@ -16,6 +16,7 @@ class LineageItem: source: str # database this resource is from badges: Optional[List[Badge]] = None usage: Optional[int] = None # statistic to sort lineage items by + parent: Optional[str] = None # key of the parent entity, used to create the relationships in graph class LineageItemSchema(AttrsSchema): diff --git a/common/setup.py b/common/setup.py index 686f52eea2..57b367a9c6 100644 --- a/common/setup.py +++ b/common/setup.py @@ -3,7 +3,7 @@ from setuptools import find_packages, setup -__version__ = '0.10.0' +__version__ = '0.10.1' setup( name='amundsen-common', diff --git a/databuilder/README.md b/databuilder/README.md index a3ae80315e..8b57c1d1fe 100644 --- a/databuilder/README.md +++ b/databuilder/README.md @@ -468,6 +468,39 @@ job = DefaultJob( job.launch() ``` +#### [DbtExtractor](https://github.com/amundsen-io/amundsendatabuilder/blob/master/databuilder/extractor/dbt_extractor.py "SQLAlchemyExtractor") +This extractor utilizes the [dbt](https://www.getdbt.com/ "dbt") output files `catalog.json` and `manifest.json` to extract metadata and ingest it into Amundsen. The `catalog.json` and `manifest.json` can both be generated by running `dbt docs generate` in your dbt project. Visit the [dbt artifacts page](https://docs.getdbt.com/reference/artifacts/dbt-artifacts "dbt artifacts") for more information. + +The `DbtExtractor` can currently create the following: + +- Tables and their definitions +- Columns and their definitions +- Table level lineage +- dbt tags (as Amundsen badges or tags) +- Table Sources (e.g. link to GitHib where the dbt template resides) + +```python +job_config = ConfigFactory.from_dict({ + # Required args + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': 'snowflake', + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': catalog_file_loc, # File location + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': json.dumps(manifest_data), # JSON Dumped object + # Optional args + f'extractor.dbt.{DbtExtractor.SOURCE_URL}': 'https://github.com/your-company/your-repo/tree/main', + f'extractor.dbt.{DbtExtractor.EXTRACT_TABLES}': True, + f'extractor.dbt.{DbtExtractor.EXTRACT_DESCRIPTIONS}': True, + f'extractor.dbt.{DbtExtractor.EXTRACT_TAGS}': True, + f'extractor.dbt.{DbtExtractor.IMPORT_TAGS_AS}': 'badges', + f'extractor.dbt.{DbtExtractor.EXTRACT_LINEAGE}': True, +}) +job = DefaultJob( + conf=job_config, + task=DefaultTask( + extractor=DbtExtractor(), + loader=AnyLoader())) +job.launch() +``` + ### [RestAPIExtractor](./databuilder/extractor/restapi/rest_api_extractor.py) A extractor that utilizes [RestAPIQuery](#rest-api-query) to extract data. RestAPIQuery needs to be constructed ([example](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_extractor.py#L40)) and needs to be injected to RestAPIExtractor. @@ -493,7 +526,7 @@ Dashboard description (Report description) Other information such as report run, owner, chart name, query name is in separate extractor. -It calls two APIs ([spaces API](https://mode.com/developer/api-reference/management/spaces/#listSpaces) and [reports API](https://mode.com/developer/api-reference/analytics/reports/#listReportsInSpace)) joining together. +It calls two APIs ([spaces API](https://mode.com/developer/discovery-api/analytics/spaces/) and [reports API](https://mode.com/developer/discovery-api/analytics/reports/)) joining together. You can create Databuilder job config like this. ```python @@ -506,8 +539,7 @@ relationship_files_folder = '{tmp_folder}/relationships'.format(tmp_folder=tmp_f job_config = ConfigFactory.from_dict({ 'extractor.mode_dashboard.{}'.format(ORGANIZATION): organization, - 'extractor.mode_dashboard.{}'.format(MODE_ACCESS_TOKEN): mode_token, - 'extractor.mode_dashboard.{}'.format(MODE_PASSWORD_TOKEN): mode_password, + 'extractor.mode_dashboard.{}'.format(MODE_BEARER_TOKEN): mode_bearer_token, 'loader.filesystem_csv_neo4j.{}'.format(FsNeo4jCSVLoader.NODE_DIR_PATH): node_files_folder, 'loader.filesystem_csv_neo4j.{}'.format(FsNeo4jCSVLoader.RELATION_DIR_PATH): relationship_files_folder, 'loader.filesystem_csv_neo4j.{}'.format(FsNeo4jCSVLoader.SHOULD_DELETE_CREATED_DIR): True, @@ -541,8 +573,7 @@ task = DefaultTask(extractor=extractor, job_config = ConfigFactory.from_dict({ '{}.{}'.format(extractor.get_scope(), ORGANIZATION): organization, - '{}.{}'.format(extractor.get_scope(), MODE_ACCESS_TOKEN): mode_token, - '{}.{}'.format(extractor.get_scope(), MODE_PASSWORD_TOKEN): mode_password, + '{}.{}'.format(extractor.get_scope(), MODE_BEARER_TOKEN): mode_bearer_token, }) job = DefaultJob(conf=job_config, @@ -562,8 +593,7 @@ task = DefaultTask(extractor=extractor, loader=FsNeo4jCSVLoader()) job_config = ConfigFactory.from_dict({ '{}.{}'.format(extractor.get_scope(), ORGANIZATION): organization, - '{}.{}'.format(extractor.get_scope(), MODE_ACCESS_TOKEN): mode_token, - '{}.{}'.format(extractor.get_scope(), MODE_PASSWORD_TOKEN): mode_password, + '{}.{}'.format(extractor.get_scope(), MODE_BEARER_TOKEN): mode_bearer_token, }) job = DefaultJob(conf=job_config, @@ -585,6 +615,7 @@ job_config = ConfigFactory.from_dict({ '{}.{}'.format(extractor.get_scope(), ORGANIZATION): organization, '{}.{}'.format(extractor.get_scope(), MODE_ACCESS_TOKEN): mode_token, '{}.{}'.format(extractor.get_scope(), MODE_PASSWORD_TOKEN): mode_password, + '{}.{}'.format(extractor.get_scope(), MODE_BEARER_TOKEN): mode_bearer_token, }) job = DefaultJob(conf=job_config, @@ -604,8 +635,7 @@ task = DefaultTask(extractor=extractor, loader=FsNeo4jCSVLoader()) job_config = ConfigFactory.from_dict({ '{}.{}'.format(extractor.get_scope(), ORGANIZATION): organization, - '{}.{}'.format(extractor.get_scope(), MODE_ACCESS_TOKEN): mode_token, - '{}.{}'.format(extractor.get_scope(), MODE_PASSWORD_TOKEN): mode_password, + '{}.{}'.format(extractor.get_scope(), MODE_BEARER_TOKEN): mode_bearer_token, }) job = DefaultJob(conf=job_config, @@ -625,8 +655,7 @@ task = DefaultTask(extractor=extractor, loader=FsNeo4jCSVLoader()) job_config = ConfigFactory.from_dict({ '{}.{}'.format(extractor.get_scope(), ORGANIZATION): organization, - '{}.{}'.format(extractor.get_scope(), MODE_ACCESS_TOKEN): mode_token, - '{}.{}'.format(extractor.get_scope(), MODE_PASSWORD_TOKEN): mode_password, + '{}.{}'.format(extractor.get_scope(), MODE_BEARER_TOKEN): mode_bearer_token, }) job = DefaultJob(conf=job_config, @@ -635,37 +664,17 @@ job = DefaultJob(conf=job_config, job.launch() ``` -#### [ModeDashboardChartsExtractor](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_charts_extractor.py) -A Extractor that extracts Mode Dashboard charts. Currently, Mode API response schema is undocumented and hard to be used for the schema seems different per chart type. For this reason, this extractor can only extracts Chart token, and Chart URL at this point. +#### [ModeDashboardChartsBatchExtractor](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_charts_batch_extractor.py) +A Extractor that extracts Mode Dashboard charts metadata. You can create Databuilder job config like this. (configuration related to loader and publisher is omitted as it is mostly the same. Please take a look at this [example](#ModeDashboardExtractor) for the configuration that holds loader and publisher. -```python -extractor = ModeDashboardChartsExtractor() -task = DefaultTask(extractor=extractor, loader=FsNeo4jCSVLoader()) - -job_config = ConfigFactory.from_dict({ - '{}.{}'.format(extractor.get_scope(), ORGANIZATION): organization, - '{}.{}'.format(extractor.get_scope(), MODE_ACCESS_TOKEN): mode_token, - '{}.{}'.format(extractor.get_scope(), MODE_PASSWORD_TOKEN): mode_password, -}) - -job = DefaultJob(conf=job_config, - task=task, - publisher=Neo4jCsvPublisher()) -job.launch() -``` - -If your organization's mode account supports discovery feature(paid feature), you could leverage [ModeDashboardChartsBatchExtractor](./databuilder/extractor/dashboard/mode_analytics/batch/mode_dashboard_charts_batch_extractor.py) which does a batch call to mode API which is more performant. You need to generate a bearer account based on the API instruction. - ```python extractor = ModeDashboardChartsBatchExtractor() task = DefaultTask(extractor=extractor, loader=FsNeo4jCSVLoader()) job_config = ConfigFactory.from_dict({ '{}.{}'.format(extractor.get_scope(), ORGANIZATION): organization, - '{}.{}'.format(extractor.get_scope(), MODE_ACCESS_TOKEN): mode_token, - '{}.{}'.format(extractor.get_scope(), MODE_PASSWORD_TOKEN): mode_password, '{}.{}'.format(extractor.get_scope(), MODE_BEARER_TOKEN): mode_bearer_token, }) @@ -675,6 +684,7 @@ job = DefaultJob(conf=job_config, job.launch() ``` + #### [ModeDashboardUserExtractor](./databuilder/extractor/dashboard/mode_analytics/mode_dashboard_user_extractor.py) A Extractor that extracts Mode user_id and then update User node. diff --git a/databuilder/databuilder/extractor/dashboard/mode_analytics/batch/__init__.py b/databuilder/databuilder/extractor/dashboard/mode_analytics/batch/__init__.py deleted file mode 100644 index f3145d75b3..0000000000 --- a/databuilder/databuilder/extractor/dashboard/mode_analytics/batch/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright Contributors to the Amundsen project. -# SPDX-License-Identifier: Apache-2.0 diff --git a/databuilder/databuilder/extractor/dashboard/mode_analytics/batch/mode_dashboard_charts_batch_extractor.py b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_charts_batch_extractor.py similarity index 96% rename from databuilder/databuilder/extractor/dashboard/mode_analytics/batch/mode_dashboard_charts_batch_extractor.py rename to databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_charts_batch_extractor.py index d8cb3c1b7a..91c790a46b 100644 --- a/databuilder/databuilder/extractor/dashboard/mode_analytics/batch/mode_dashboard_charts_batch_extractor.py +++ b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_charts_batch_extractor.py @@ -76,11 +76,13 @@ def _build_restapi_query(self) -> RestApiQuery: 'chart_id', 'chart_name', 'chart_type'] + max_record_size = 1000 chart_batch_query = ModePaginatedRestApiQuery(query_to_join=seed_query, url=chart_url_template, params=params, json_path=json_path, pagination_json_path=json_path, field_names=field_names, - skip_no_result=True) + skip_no_result=True, + max_record_size=max_record_size) return chart_batch_query diff --git a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_charts_extractor.py b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_charts_extractor.py deleted file mode 100644 index d276fdef4c..0000000000 --- a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_charts_extractor.py +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright Contributors to the Amundsen project. -# SPDX-License-Identifier: Apache-2.0 - -import logging -from typing import Any, List - -from pyhocon import ConfigFactory, ConfigTree - -from databuilder import Scoped -from databuilder.extractor.base_extractor import Extractor -from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_utils import ModeDashboardUtils -from databuilder.rest_api.mode_analytics.mode_paginated_rest_api_query import ModePaginatedRestApiQuery -from databuilder.rest_api.rest_api_query import RestApiQuery -from databuilder.transformer.base_transformer import ChainedTransformer, Transformer -from databuilder.transformer.dict_to_model import MODEL_CLASS, DictToModel -from databuilder.transformer.template_variable_substitution_transformer import ( - FIELD_NAME, TEMPLATE, TemplateVariableSubstitutionTransformer, -) - -LOGGER = logging.getLogger(__name__) - - -class ModeDashboardChartsExtractor(Extractor): - """ - A Extractor that extracts Dashboard charts - - """ - - def init(self, conf: ConfigTree) -> None: - self._conf = conf - - restapi_query = self._build_restapi_query() - self._extractor = ModeDashboardUtils.create_mode_rest_api_extractor( - restapi_query=restapi_query, - conf=self._conf - ) - - # Constructing URL using resource path via TemplateVariableSubstitutionTransformer - transformers: List[Transformer] = [] - chart_url_transformer = TemplateVariableSubstitutionTransformer() - chart_url_transformer.init( - conf=Scoped.get_scoped_conf(self._conf, chart_url_transformer.get_scope()).with_fallback( - ConfigFactory.from_dict({FIELD_NAME: 'chart_url', - TEMPLATE: 'https://app.mode.com{chart_url}'}))) - - transformers.append(chart_url_transformer) - - dict_to_model_transformer = DictToModel() - dict_to_model_transformer.init( - conf=Scoped.get_scoped_conf(self._conf, dict_to_model_transformer.get_scope()).with_fallback( - ConfigFactory.from_dict( - {MODEL_CLASS: 'databuilder.models.dashboard.dashboard_chart.DashboardChart'}))) - transformers.append(dict_to_model_transformer) - - self._transformer = ChainedTransformer(transformers=transformers) - - def extract(self) -> Any: - record = self._extractor.extract() - if not record: - return None - - return self._transformer.transform(record=record) - - def get_scope(self) -> str: - return 'extractor.mode_dashboard_chart' - - def _build_restapi_query(self) -> RestApiQuery: - """ - Build REST API Query. To get Mode Dashboard last execution, it needs to call three APIs (spaces API, reports - API, and run API) joining together. - :return: A RestApiQuery that provides Mode Dashboard execution (run) - """ - - spaces_query = ModeDashboardUtils.get_spaces_query_api(conf=self._conf) - params = ModeDashboardUtils.get_auth_params(conf=self._conf) - - # Reports - # https://mode.com/developer/api-reference/analytics/reports/#listReportsInSpace - report_url_template = 'https://app.mode.com/api/{organization}/spaces/{dashboard_group_id}/reports' - json_path = '(_embedded.reports[*].token)' - field_names = ['dashboard_id'] - reports_query = ModePaginatedRestApiQuery(query_to_join=spaces_query, url=report_url_template, params=params, - json_path=json_path, field_names=field_names, skip_no_result=True) - - queries_url_template = 'https://app.mode.com/api/{organization}/reports/{dashboard_id}/queries' - json_path = '_embedded.queries[*].[token,name]' - field_names = ['query_id', 'query_name'] - query_names_query = RestApiQuery(query_to_join=reports_query, url=queries_url_template, params=params, - json_path=json_path, field_names=field_names, skip_no_result=True) - - charts_url_template = 'https://app.mode.com/api/{organization}/reports/{dashboard_id}/queries/{query_id}/charts' - json_path = '(_embedded.charts[*].token) | (_embedded.charts[*]._links.report_viz_web.href)' - field_names = ['chart_id', 'chart_url'] - chart_names_query = RestApiQuery(query_to_join=query_names_query, url=charts_url_template, params=params, - json_path=json_path, field_names=field_names, skip_no_result=True, - json_path_contains_or=True) - - return chart_names_query diff --git a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_executions_extractor.py b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_executions_extractor.py index 12be4bda1e..d417e9b93e 100644 --- a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_executions_extractor.py +++ b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_executions_extractor.py @@ -10,6 +10,7 @@ from databuilder.extractor.base_extractor import Extractor from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_utils import ModeDashboardUtils from databuilder.rest_api.mode_analytics.mode_paginated_rest_api_query import ModePaginatedRestApiQuery +from databuilder.rest_api.rest_api_failure_handlers import HttpFailureSkipOnStatus from databuilder.rest_api.rest_api_query import RestApiQuery from databuilder.transformer.base_transformer import ChainedTransformer, Transformer from databuilder.transformer.dict_to_model import MODEL_CLASS, DictToModel @@ -69,6 +70,11 @@ def _build_restapi_query(self) -> RestApiQuery: :return: A RestApiQuery that provides Mode Dashboard execution (run) """ + # TODO: revise this extractor once Mode team provides last execution timestamp in reports discovery api + # https://mode.com/developer/discovery-api/analytics/reports/ + # Once we can fully switch to Mode discovery api, + # the performance of this extractor will be dramatically increased. + spaces_query = ModeDashboardUtils.get_spaces_query_api(conf=self._conf) params = ModeDashboardUtils.get_auth_params(conf=self._conf) @@ -77,10 +83,16 @@ def _build_restapi_query(self) -> RestApiQuery: url = 'https://app.mode.com/api/{organization}/spaces/{dashboard_group_id}/reports' json_path = '(_embedded.reports[*].token) | (_embedded.reports[*]._links.last_run.href)' field_names = ['dashboard_id', 'last_run_resource_path'] + + # the spaces_query is authenticated with a bearer token, + # which returns spaces that may be beyond access of the user calling Mode main api. + # When this happens, 404 will be returned and hence should be skipped. + failure_handler = HttpFailureSkipOnStatus(status_codes_to_skip={404}) last_run_resource_path_query = ModePaginatedRestApiQuery(query_to_join=spaces_query, url=url, params=params, json_path=json_path, field_names=field_names, skip_no_result=True, - json_path_contains_or=True) + json_path_contains_or=True, + can_skip_failure=failure_handler.can_skip_failure) # https://mode.com/developer/api-reference/analytics/report-runs/#getReportRun url = 'https://app.mode.com{last_run_resource_path}' diff --git a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_extractor.py b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_extractor.py index 1a86e971bd..ad0eaf1846 100644 --- a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_extractor.py +++ b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_extractor.py @@ -10,7 +10,7 @@ from databuilder.extractor.base_extractor import Extractor from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_utils import ModeDashboardUtils from databuilder.rest_api.mode_analytics.mode_paginated_rest_api_query import ModePaginatedRestApiQuery -from databuilder.rest_api.rest_api_query import RestApiQuery +from databuilder.rest_api.query_merger import QueryMerger from databuilder.transformer.base_transformer import ChainedTransformer, Transformer from databuilder.transformer.dict_to_model import MODEL_CLASS, DictToModel from databuilder.transformer.template_variable_substitution_transformer import ( @@ -86,24 +86,29 @@ def extract(self) -> Any: def get_scope(self) -> str: return 'extractor.mode_dashboard' - def _build_restapi_query(self) -> RestApiQuery: + def _build_restapi_query(self) -> ModePaginatedRestApiQuery: """ - Build REST API Query. To get Mode Dashboard metadata, it needs to call two APIs (spaces API and reports - API) joining together. + Build REST API Query to get Mode Dashboard metadata :return: A RestApiQuery that provides Mode Dashboard metadata """ + seed_query = ModeDashboardUtils.get_seed_query(conf=self._conf) + params = ModeDashboardUtils.get_auth_params(conf=self._conf, discover_auth=True) - # https://mode.com/developer/api-reference/analytics/reports/#listReportsInSpace - reports_url_template = 'https://app.mode.com/api/{organization}/spaces/{dashboard_group_id}/reports' + # Reports + # https://mode.com/developer/discovery-api/analytics/reports/ + url = 'https://app.mode.com/batch/{organization}/reports' + json_path = 'reports[*].[token, name, description, created_at, space_token]' + field_names = ['dashboard_id', 'dashboard_name', 'description', 'created_timestamp', 'dashboard_group_id'] + max_record_size = 1000 + pagination_json_path = 'reports[*]' spaces_query = ModeDashboardUtils.get_spaces_query_api(conf=self._conf) - params = ModeDashboardUtils.get_auth_params(conf=self._conf) + query_merger = QueryMerger(query_to_merge=spaces_query, merge_key='dashboard_group_id') + + reports_query = ModePaginatedRestApiQuery(query_to_join=seed_query, url=url, params=params, + json_path=json_path, field_names=field_names, + skip_no_result=True, max_record_size=max_record_size, + pagination_json_path=pagination_json_path, + query_merger=query_merger) - # Reports - # JSONPATH expression. it goes into array which is located in _embedded.reports and then extracts token, name, - # and description - json_path = '_embedded.reports[*].[token,name,description,created_at]' - field_names = ['dashboard_id', 'dashboard_name', 'description', 'created_timestamp'] - reports_query = ModePaginatedRestApiQuery(query_to_join=spaces_query, url=reports_url_template, params=params, - json_path=json_path, field_names=field_names, skip_no_result=True) return reports_query diff --git a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_last_modified_timestamp_extractor.py b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_last_modified_timestamp_extractor.py index 0ef0d20308..63a1b3e6d1 100644 --- a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_last_modified_timestamp_extractor.py +++ b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_last_modified_timestamp_extractor.py @@ -11,7 +11,6 @@ from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_utils import ModeDashboardUtils from databuilder.extractor.restapi.rest_api_extractor import STATIC_RECORD_DICT from databuilder.rest_api.mode_analytics.mode_paginated_rest_api_query import ModePaginatedRestApiQuery -from databuilder.rest_api.rest_api_query import RestApiQuery from databuilder.transformer.dict_to_model import MODEL_CLASS, DictToModel from databuilder.transformer.timestamp_string_to_epoch import FIELD_NAME, TimestampStringToEpoch @@ -42,23 +41,25 @@ def init(self, conf: ConfigTree) -> None: def get_scope(self) -> str: return 'extractor.mode_dashboard_last_modified_timestamp_execution' - def _build_restapi_query(self) -> RestApiQuery: + def _build_restapi_query(self) -> ModePaginatedRestApiQuery: """ - Build REST API Query. To get Mode Dashboard last modified timestamp, it needs to call two APIs (spaces API, - and reports API) joining together. + Build REST API Query to get Mode Dashboard last modified timestamp :return: A RestApiQuery that provides Mode Dashboard last successful execution (run) """ - spaces_query = ModeDashboardUtils.get_spaces_query_api(conf=self._conf) - params = ModeDashboardUtils.get_auth_params(conf=self._conf) + seed_query = ModeDashboardUtils.get_seed_query(conf=self._conf) + params = ModeDashboardUtils.get_auth_params(conf=self._conf, discover_auth=True) # Reports - # https://mode.com/developer/api-reference/analytics/reports/#listReportsInSpace - url = 'https://app.mode.com/api/{organization}/spaces/{dashboard_group_id}/reports' - json_path = '_embedded.reports[*].[token,edited_at]' - field_names = ['dashboard_id', 'last_modified_timestamp'] - last_modified_query = ModePaginatedRestApiQuery(query_to_join=spaces_query, url=url, params=params, + # https://mode.com/developer/discovery-api/analytics/reports/ + url = 'https://app.mode.com/batch/{organization}/reports' + json_path = 'reports[*].[token, space_token, edited_at]' + field_names = ['dashboard_id', 'dashboard_group_id', 'last_modified_timestamp'] + max_record_size = 1000 + pagination_json_path = 'reports[*]' + last_modified_query = ModePaginatedRestApiQuery(query_to_join=seed_query, url=url, params=params, json_path=json_path, field_names=field_names, - skip_no_result=True) + skip_no_result=True, max_record_size=max_record_size, + pagination_json_path=pagination_json_path) return last_modified_query diff --git a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_last_successful_executions_extractor.py b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_last_successful_executions_extractor.py index ac4ed5fd1a..d477750387 100644 --- a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_last_successful_executions_extractor.py +++ b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_last_successful_executions_extractor.py @@ -12,7 +12,6 @@ from databuilder.extractor.restapi.rest_api_extractor import STATIC_RECORD_DICT from databuilder.models.dashboard.dashboard_execution import DashboardExecution from databuilder.rest_api.mode_analytics.mode_paginated_rest_api_query import ModePaginatedRestApiQuery -from databuilder.rest_api.rest_api_query import RestApiQuery LOGGER = logging.getLogger(__name__) @@ -39,23 +38,25 @@ def init(self, conf: ConfigTree) -> None: def get_scope(self) -> str: return 'extractor.mode_dashboard_last_successful_execution' - def _build_restapi_query(self) -> RestApiQuery: + def _build_restapi_query(self) -> ModePaginatedRestApiQuery: """ - Build REST API Query. To get Mode Dashboard last successful execution, it needs to call two APIs (spaces API, - and reports API) joining together. + Build REST API Query to get Mode Dashboard last successful execution metadata. :return: A RestApiQuery that provides Mode Dashboard last successful execution (run) """ - spaces_query = ModeDashboardUtils.get_spaces_query_api(conf=self._conf) - params = ModeDashboardUtils.get_auth_params(conf=self._conf) + seed_query = ModeDashboardUtils.get_seed_query(conf=self._conf) + params = ModeDashboardUtils.get_auth_params(conf=self._conf, discover_auth=True) # Reports - # https://mode.com/developer/api-reference/analytics/reports/#listReportsInSpace - url = 'https://app.mode.com/api/{organization}/spaces/{dashboard_group_id}/reports' - json_path = '_embedded.reports[*].[token,last_successfully_run_at]' - field_names = ['dashboard_id', 'execution_timestamp'] - last_successful_run_query = ModePaginatedRestApiQuery(query_to_join=spaces_query, url=url, params=params, + # https://mode.com/developer/discovery-api/analytics/reports/ + url = 'https://app.mode.com/batch/{organization}/reports' + json_path = 'reports[*].[token, space_token, last_successfully_run_at]' + field_names = ['dashboard_id', 'dashboard_group_id', 'execution_timestamp'] + max_record_size = 1000 + pagination_json_path = 'reports[*]' + last_successful_run_query = ModePaginatedRestApiQuery(query_to_join=seed_query, url=url, params=params, json_path=json_path, field_names=field_names, - skip_no_result=True) + skip_no_result=True, max_record_size=max_record_size, + pagination_json_path=pagination_json_path) return last_successful_run_query diff --git a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_owner_extractor.py b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_owner_extractor.py index a616ea0dad..067f20e64d 100644 --- a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_owner_extractor.py +++ b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_owner_extractor.py @@ -10,8 +10,6 @@ from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_utils import ModeDashboardUtils from databuilder.extractor.restapi.rest_api_extractor import MODEL_CLASS from databuilder.rest_api.mode_analytics.mode_paginated_rest_api_query import ModePaginatedRestApiQuery -from databuilder.rest_api.rest_api_failure_handlers import HttpFailureSkipOnStatus -from databuilder.rest_api.rest_api_query import RestApiQuery LOGGER = logging.getLogger(__name__) @@ -41,37 +39,25 @@ def extract(self) -> Any: def get_scope(self) -> str: return 'extractor.mode_dashboard_owner' - def _build_restapi_query(self) -> RestApiQuery: + def _build_restapi_query(self) -> ModePaginatedRestApiQuery: """ - Build REST API Query. To get Mode Dashboard owner, it needs to call three APIs (spaces API, reports - API, and user API) joining together. + Build REST API Query to get Mode Dashboard owner :return: A RestApiQuery that provides Mode Dashboard owner """ - # https://mode.com/developer/api-reference/analytics/reports/#listReportsInSpace - report_url_template = 'https://app.mode.com/api/{organization}/spaces/{dashboard_group_id}/reports' - - # https://mode.com/developer/api-reference/management/users/ - creator_url_template = 'https://app.mode.com{creator_resource_path}' - - spaces_query = ModeDashboardUtils.get_spaces_query_api(conf=self._conf) - params = ModeDashboardUtils.get_auth_params(conf=self._conf) + seed_query = ModeDashboardUtils.get_seed_query(conf=self._conf) + params = ModeDashboardUtils.get_auth_params(conf=self._conf, discover_auth=True) # Reports - json_path = '(_embedded.reports[*].token) | (_embedded.reports[*]._links.creator.href)' - field_names = ['dashboard_id', 'creator_resource_path'] - creator_resource_path_query = ModePaginatedRestApiQuery(query_to_join=spaces_query, url=report_url_template, - params=params, - json_path=json_path, field_names=field_names, - skip_no_result=True, - json_path_contains_or=True) - - json_path = 'email' - field_names = ['email'] - failure_handler = HttpFailureSkipOnStatus(status_codes_to_skip={404}) - owner_email_query = RestApiQuery(query_to_join=creator_resource_path_query, url=creator_url_template, - params=params, - json_path=json_path, field_names=field_names, skip_no_result=True, - can_skip_failure=failure_handler.can_skip_failure) - - return owner_email_query + # https://mode.com/developer/discovery-api/analytics/reports/ + url = 'https://app.mode.com/batch/{organization}/reports' + json_path = 'reports[*].[token, space_token, creator_email]' + field_names = ['dashboard_id', 'dashboard_group_id', 'email'] + max_record_size = 1000 + pagination_json_path = 'reports[*]' + creator_query = ModePaginatedRestApiQuery(query_to_join=seed_query, url=url, params=params, + json_path=json_path, field_names=field_names, + skip_no_result=True, max_record_size=max_record_size, + pagination_json_path=pagination_json_path) + + return creator_query diff --git a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_queries_extractor.py b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_queries_extractor.py index c3debcff6b..b80b02750d 100644 --- a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_queries_extractor.py +++ b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_queries_extractor.py @@ -10,7 +10,6 @@ from databuilder.extractor.base_extractor import Extractor from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_utils import ModeDashboardUtils from databuilder.rest_api.mode_analytics.mode_paginated_rest_api_query import ModePaginatedRestApiQuery -from databuilder.rest_api.rest_api_query import RestApiQuery from databuilder.transformer.base_transformer import ChainedTransformer, Transformer from databuilder.transformer.dict_to_model import MODEL_CLASS, DictToModel from databuilder.transformer.regex_str_replace_transformer import ( @@ -77,28 +76,25 @@ def extract(self) -> Any: def get_scope(self) -> str: return 'extractor.mode_dashboard_query' - def _build_restapi_query(self) -> RestApiQuery: + def _build_restapi_query(self) -> ModePaginatedRestApiQuery: """ - Build REST API Query. To get Mode Dashboard last execution, it needs to call three APIs (spaces API, reports - API, and queries API) joining together. + Build REST API Query to get Mode Dashboard queries :return: A RestApiQuery that provides Mode Dashboard execution (run) """ - spaces_query = ModeDashboardUtils.get_spaces_query_api(conf=self._conf) - params = ModeDashboardUtils.get_auth_params(conf=self._conf) - - # Reports - # https://mode.com/developer/api-reference/analytics/reports/#listReportsInSpace - url = 'https://app.mode.com/api/{organization}/spaces/{dashboard_group_id}/reports' - json_path = '(_embedded.reports[*].token)' - field_names = ['dashboard_id'] - reports_query = ModePaginatedRestApiQuery(query_to_join=spaces_query, url=url, params=params, - json_path=json_path, field_names=field_names, skip_no_result=True) - - queries_url_template = 'https://app.mode.com/api/{organization}/reports/{dashboard_id}/queries' - json_path = '_embedded.queries[*].[token,name,raw_query]' - field_names = ['query_id', 'query_name', 'query_text'] - query_names_query = RestApiQuery(query_to_join=reports_query, url=queries_url_template, params=params, - json_path=json_path, field_names=field_names, skip_no_result=True) + seed_query = ModeDashboardUtils.get_seed_query(conf=self._conf) + params = ModeDashboardUtils.get_auth_params(conf=self._conf, discover_auth=True) + + # Queries + # https://mode.com/developer/discovery-api/analytics/queries/ + url = 'https://app.mode.com/batch/{organization}/queries' + json_path = 'queries[*].[report_token, space_token, token, name, raw_query]' + field_names = ['dashboard_id', 'dashboard_group_id', 'query_id', 'query_name', 'query_text'] + max_record_size = 1000 + pagination_json_path = 'queries[*]' + query_names_query = ModePaginatedRestApiQuery(query_to_join=seed_query, url=url, params=params, + json_path=json_path, field_names=field_names, + skip_no_result=True, max_record_size=max_record_size, + pagination_json_path=pagination_json_path) return query_names_query diff --git a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_usage_extractor.py b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_usage_extractor.py index a84b7343db..6aa1a5d0df 100644 --- a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_usage_extractor.py +++ b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_usage_extractor.py @@ -9,6 +9,7 @@ from databuilder.extractor.base_extractor import Extractor from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_utils import ModeDashboardUtils from databuilder.rest_api.mode_analytics.mode_paginated_rest_api_query import ModePaginatedRestApiQuery +from databuilder.rest_api.rest_api_failure_handlers import HttpFailureSkipOnStatus from databuilder.rest_api.rest_api_query import RestApiQuery LOGGER = logging.getLogger(__name__) @@ -39,6 +40,11 @@ def _build_restapi_query(self) -> RestApiQuery: :return: A RestApiQuery that provides Mode Dashboard metadata """ + # TODO: revise this extractor once Mode team provides total_views_count in reports discovery api + # https://mode.com/developer/discovery-api/analytics/reports/ + # Once we can fully switch to Mode discovery api, + # the performance of this extractor will be dramatically increased. + # https://mode.com/developer/api-reference/analytics/reports/#listReportsInSpace reports_url_template = 'https://app.mode.com/api/{organization}/spaces/{dashboard_group_id}/reports' @@ -50,6 +56,12 @@ def _build_restapi_query(self) -> RestApiQuery: # and view_count json_path = '_embedded.reports[*].[token,view_count]' field_names = ['dashboard_id', 'accumulated_view_count'] + + # the spaces_query is authenticated with a bearer token, + # which returns spaces that may be beyond access of the user calling Mode main api. + # When this happens, 404 will be returned and hence should be skipped. + failure_handler = HttpFailureSkipOnStatus(status_codes_to_skip={404}) reports_query = ModePaginatedRestApiQuery(query_to_join=spaces_query, url=reports_url_template, params=params, - json_path=json_path, field_names=field_names, skip_no_result=True) + json_path=json_path, field_names=field_names, skip_no_result=True, + can_skip_failure=failure_handler.can_skip_failure) return reports_query diff --git a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_utils.py b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_utils.py index 92cbeea41c..aae4465eea 100644 --- a/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_utils.py +++ b/databuilder/databuilder/extractor/dashboard/mode_analytics/mode_dashboard_utils.py @@ -19,6 +19,13 @@ class ModeDashboardUtils(object): + @staticmethod + def get_seed_query(conf: ConfigTree) -> BaseRestApiQuery: + # Seed query record for next query api to join with + seed_record = [{'organization': conf.get_string(ORGANIZATION)}] + seed_query = RestApiQuerySeed(seed_record=seed_record) + return seed_query + @staticmethod def get_spaces_query_api(conf: ConfigTree) -> BaseRestApiQuery: """ @@ -32,8 +39,7 @@ def get_spaces_query_api(conf: ConfigTree) -> BaseRestApiQuery: spaces_url_template = 'https://app.mode.com/batch/{organization}/spaces' # Seed query record for next query api to join with - seed_record = [{'organization': conf.get_string(ORGANIZATION)}] - seed_query = RestApiQuerySeed(seed_record=seed_record) + seed_query = ModeDashboardUtils.get_seed_query(conf=conf) # mode_bearer_token must be provided in the conf # the token is required to access discovery endpoint diff --git a/databuilder/databuilder/extractor/dbt_extractor.py b/databuilder/databuilder/extractor/dbt_extractor.py new file mode 100644 index 0000000000..97f4b4c6a5 --- /dev/null +++ b/databuilder/databuilder/extractor/dbt_extractor.py @@ -0,0 +1,328 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +import json +import logging +import os +from enum import Enum +from typing import ( + Dict, Iterator, List, Optional, Tuple, Union, +) + +from pyhocon import ConfigTree + +from databuilder.extractor.base_extractor import Extractor +from databuilder.models.badge import Badge, BadgeMetadata +from databuilder.models.table_lineage import TableLineage +from databuilder.models.table_metadata import ColumnMetadata, TableMetadata +from databuilder.models.table_source import TableSource + +LOGGER = logging.getLogger(__name__) + + +DBT_CATALOG_REQD_KEYS = ['nodes'] +DBT_MANIFEST_REQD_KEYS = ['nodes', 'child_map'] +DBT_MODEL_TYPE = 'model' +DBT_MODEL_PREFIX = 'model.' +DBT_TEST_PREFIX = 'test.' + + +class DBT_TAG_AS(Enum): + BADGE = 'badge' + TAG = 'tag' + + +class DBT_MODEL_NAME_KEY(Enum): + ALIAS = 'alias' + NAME = 'name' + + +class InvalidDbtInputs(Exception): + pass + + +class DbtExtractor(Extractor): + """ + Extracts metadata from the dbt manifest.json and catalog.json files. + At least one of a manifest or a catalog (or both) must be provided. + The location of the file or a valid Python dictionary of the content + can be provided. + + Currently the following assets are extracted from these files: + + - Tables + - Columns + - Definitions + - Table lineage + - Tags (converted to Amundsen Badges) + + Additional metadagta exists and may be extracted in the future: + + - Run / test outcomes + - Freshness + - Hooks (as programatic description?) + - Analysis (as queries for a table??) + - Table / column level statistics + - Table comments (as programatic descriptoins) + """ + + CATALOG_JSON = "catalog_json" + MANIFEST_JSON = "manifest_json" + DATABASE_NAME = 'database_name' + + # Dbt Extract Options + EXTRACT_TABLES = 'extract_tables' + EXTRACT_DESCRIPTIONS = 'extract_descriptions' + EXTRACT_TAGS = 'extract_tags' + EXTRACT_LINEAGE = 'extract_lineage' + SOURCE_URL = 'source_url' # Base source code URL for the repo containing dbt workflows + IMPORT_TAGS_AS = 'import_tags_as' + SCHEMA_FILTER = 'schema_filter' # Only extract dbt models from this schema, defaults to all models + MODEL_NAME_KEY = 'model_name_key' # Whether to use the "name" or "alias" from dbt as the Amundsen name + + # Makes all db, schema, cluster and table names lowercase. This is done so that table metadata from dbt + # with the default key `Sample://Cluster/Schema/Table` match existing metadata that Amundsen has from + # the database, which may be `sample://cluster/schema/table`. + # Most databases that dbt integrates with either use lowercase by default in the information schema + # or the default Amundsen extractor applies a `lower(...)` function to the result (e.g. snowflake). + # However, Amundsen does not currently enforce a consistent convention and some databases do support + # upper and lowercase naming conventions (e.g. Redshift). It may be useful to set this False in the + # config if the table metadata keys in your database are not all lowercase and to then use a transformer to + # properly format the string value. + FORCE_TABLE_KEY_LOWER = 'force_table_key_lower' + + def init(self, conf: ConfigTree) -> None: + self._conf = conf + self._database_name = conf.get_string(DbtExtractor.DATABASE_NAME) + self._dbt_manifest = conf.get_string(DbtExtractor.MANIFEST_JSON) + self._dbt_catalog = conf.get_string(DbtExtractor.CATALOG_JSON) + # Extract options + self._extract_tables = conf.get_bool(DbtExtractor.EXTRACT_TABLES, True) + self._extract_descriptions = conf.get_bool(DbtExtractor.EXTRACT_DESCRIPTIONS, True) + self._extract_tags = conf.get_bool(DbtExtractor.EXTRACT_TAGS, True) + self._extract_lineage = conf.get_bool(DbtExtractor.EXTRACT_LINEAGE, True) + self._source_url = conf.get_string(DbtExtractor.SOURCE_URL, None) + self._force_table_key_lower = conf.get_bool(DbtExtractor.FORCE_TABLE_KEY_LOWER, True) + self._dbt_tag_as = DBT_TAG_AS(conf.get_string(DbtExtractor.IMPORT_TAGS_AS, DBT_TAG_AS.BADGE.value)) + self._schema_filter = conf.get_string(DbtExtractor.SCHEMA_FILTER, '') + self._model_name_key = DBT_MODEL_NAME_KEY( + conf.get_string(DbtExtractor.MODEL_NAME_KEY, DBT_MODEL_NAME_KEY.NAME.value)).value + self._clean_inputs() + + self._extract_iter: Union[None, Iterator] = None + + def get_scope(self) -> str: + return "extractor.dbt" + + def _validate_catalog(self) -> None: + # Load the catalog file if needed and run basic validation on the content + try: + self._dbt_catalog = json.loads(self._dbt_catalog) + except Exception: + try: + with open(self._dbt_catalog, 'rb') as f: + self._dbt_catalog = json.load(f) + except Exception as e: + raise InvalidDbtInputs( + 'Invalid content for a dbt catalog was provided. Must be a valid Python ' + 'dictionary or the location of a file. Error received: %s' % e + ) + for catalog_key in DBT_CATALOG_REQD_KEYS: + if catalog_key not in self._dbt_catalog: + raise InvalidDbtInputs( + "Dbt catalog file must contain keys: %s, found keys: %s" + % (DBT_CATALOG_REQD_KEYS, self._dbt_catalog.keys()) + ) + + def _validate_manifest(self) -> None: + # Load the manifest file if needed and run basic validation on the content + try: + self._dbt_manifest = json.loads(self._dbt_manifest) + except Exception: + try: + with open(self._dbt_manifest, 'rb') as f: + self._dbt_manifest = json.load(f) + except Exception as e: + raise InvalidDbtInputs( + 'Invalid content for a dbt manifest was provided. Must be a valid Python ' + 'dictionary or the location of a file. Error received: %s' % e + ) + for manifest_key in DBT_MANIFEST_REQD_KEYS: + if manifest_key not in self._dbt_manifest: + raise InvalidDbtInputs( + "Dbt manifest file must contain keys: %s, found keys: %s" + % (DBT_MANIFEST_REQD_KEYS, self._dbt_manifest.keys()) + ) + + def _clean_inputs(self) -> None: + """ + Validates the dbt input to ensure that at least one of the inputs + (manifest.json or catalog.json) are provided. Once validated, the + inputs are sanitized to ensure that the `self._dbt_manifest` and + `self._dbt_catalog` are valid Python dictionaries. + """ + if self._database_name is None: + raise InvalidDbtInputs( + 'Must provide a database name that corresponds to this dbt catalog and manifest.' + ) + + if not self._dbt_manifest or not self._dbt_catalog: + raise InvalidDbtInputs( + 'Must provide a dbt manifest file and dbt catalog file.' + ) + + self._validate_catalog() + self._validate_manifest() + + def extract(self) -> Union[TableMetadata, None]: + """ + For every feature table from Feast, a multiple objets are extracted: + + 1. TableMetadata with feature table description + 2. Programmatic Description of the feature table, containing + metadata - date of creation and labels + 3. Programmatic Description with Batch Source specification + 4. (if applicable) Programmatic Description with Stream Source + specification + """ + if not self._extract_iter: + self._extract_iter = self._get_extract_iter() + try: + return next(self._extract_iter) + except StopIteration: + return None + + def _default_sanitize(self, s: str) -> str: + """ + Default function that will be run to convert the value of a string to lowercase. + """ + if s and self._force_table_key_lower: + s = s.lower() + return s + + def _get_table_descriptions(self, manifest_content: Dict) -> Tuple[Optional[str], Optional[str]]: + """ + Gets a description and description source for a table. + """ + desc, desc_src = None, None + if self._extract_descriptions: + desc = manifest_content.get('description') + desc_src = 'dbt description' + return desc, desc_src + + def _get_table_tags_badges(self, manifest_content: Dict) -> Tuple[Optional[List[str]], Optional[List[str]]]: + """ + Gets tags or badges for a given table. At most one of these values will not be null. + """ + tags, tbl_badges = None, None + if self._extract_tags: + if self._dbt_tag_as == DBT_TAG_AS.BADGE: + tbl_badges = manifest_content.get('tags') + elif self._dbt_tag_as == DBT_TAG_AS.TAG: + tags = manifest_content.get('tags') + return tags, tbl_badges + + def _can_yield_schema(self, schema: str) -> bool: + """ + Whether or not the schema can be yielded based on the schema filter criteria. + """ + return (not self._schema_filter) or (self._schema_filter.lower() == schema.lower()) + + def _get_extract_iter(self) -> Iterator[Union[TableMetadata, BadgeMetadata, TableSource, TableLineage]]: + """ + Generates the extract iterator for all of the model types created by the dbt files. + """ + dbt_id_to_table_key = {} + for tbl_node, manifest_content in self._dbt_manifest['nodes'].items(): + + if manifest_content['resource_type'] == DBT_MODEL_TYPE and tbl_node in self._dbt_catalog['nodes']: + LOGGER.info( + 'Extracting dbt {}.{}'.format(manifest_content['schema'], manifest_content[self._model_name_key]) + ) + + catalog_content = self._dbt_catalog['nodes'][tbl_node] + + tbl_columns: List[ColumnMetadata] = self._get_column_values( + manifest_columns=manifest_content['columns'], catalog_columns=catalog_content['columns'] + ) + + desc, desc_src = self._get_table_descriptions(manifest_content) + tags, tbl_badges = self._get_table_tags_badges(manifest_content) + + tbl_metedata = TableMetadata( + database=self._default_sanitize(self._database_name), + # The dbt "database" is the cluster here + cluster=self._default_sanitize(manifest_content['database']), + schema=self._default_sanitize(manifest_content['schema']), + name=self._default_sanitize(manifest_content[self._model_name_key]), + is_view=catalog_content['metadata']['type'] == 'VIEW', + columns=tbl_columns, + tags=tags, + description=desc, + description_source=desc_src + ) + # Keep track for Lineage + dbt_id_to_table_key[tbl_node] = tbl_metedata._get_table_key() + + # Optionally filter schemas in the output + yield_schema = self._can_yield_schema(manifest_content['schema']) + + if self._extract_tables and yield_schema: + yield tbl_metedata + + if self._extract_tags and tbl_badges and yield_schema: + yield BadgeMetadata(start_label=TableMetadata.TABLE_NODE_LABEL, + start_key=tbl_metedata._get_table_key(), + badges=[Badge(badge, 'table') for badge in tbl_badges]) + + if self._source_url and yield_schema: + yield TableSource(db_name=tbl_metedata.database, + cluster=tbl_metedata.cluster, + schema=tbl_metedata.schema, + table_name=tbl_metedata.name, + source=os.path.join(self._source_url, manifest_content.get('original_file_path'))) + + if self._extract_lineage: + for upstream, downstreams in self._dbt_manifest['child_map'].items(): + valid_downstreams = [ + dbt_id_to_table_key[k] for k in downstreams if k.startswith(DBT_MODEL_PREFIX) + ] + if valid_downstreams: + yield TableLineage( + table_key=dbt_id_to_table_key[upstream], + downstream_deps=valid_downstreams + ) + + def _get_column_values(self, manifest_columns: Dict, catalog_columns: Dict) -> List[ColumnMetadata]: + """ + Iterates over the columns in the manifest file and creates a `ColumnMetadata` object + with the combined informatino from the manifest file as well as the catalog file. + + :params manifest_columns: A dictionary of values from the manifest.json, the keys + are column names and the values are column metadata + :params catalog_columns: A dictionary of values from the catalog.json, the keys + are column names and the values are column metadata + :returns: A list of `ColumnMetadata` in Amundsen. + """ + tbl_columns = [] + for manifest_col_name, manifest_col_content in manifest_columns.items(): + catalog_col_content = catalog_columns.get(manifest_col_name.upper(), {}) + + col_desc = None + if self._extract_descriptions: + col_desc = manifest_col_content.get('description') + + # Only extract column-level tags IF converting to badges, Amundsen does not have column-level tags + badges = None + if self._extract_tags and self._dbt_tag_as == DBT_TAG_AS.BADGE: + badges = manifest_col_content.get('tags') + + col_metadata = ColumnMetadata( + name=self._default_sanitize(catalog_col_content['name']), + description=col_desc, + col_type=catalog_col_content['type'], + sort_order=catalog_col_content['index'], + badges=badges + ) + tbl_columns.append(col_metadata) + return tbl_columns diff --git a/databuilder/databuilder/rest_api/query_merger.py b/databuilder/databuilder/rest_api/query_merger.py new file mode 100644 index 0000000000..0d99fcb2ba --- /dev/null +++ b/databuilder/databuilder/rest_api/query_merger.py @@ -0,0 +1,88 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +from typing import Any, Dict + +from databuilder.rest_api.base_rest_api_query import BaseRestApiQuery + + +class QueryMerger: + """ + To be used in rest_api_query + + e.g. Assuming + query_merger = QueryMerger(query_to_merge=spaces_query, merge_key='dashboard_group_id'), + where spaces_query yields a record like + { + 'dashboard_group_id': 'ggg', + 'dashboard_group': 'dashboard group' + }, + and RestApiQuery's inner_rest_api_query.execute() returns a record of + { + 'dashboard_id': 'ddd', + 'dashboard_name': 'dashboard_name', + 'dashboard_group_id': 'ggg' + }, + the final yield record_dict from RestApiQuery(query_merger=query_merger).execute() will be + { + 'dashboard_id': 'ddd', + 'dashboard_name': 'dashboard_name', + 'dashboard_group_id': 'ggg', + 'dashboard_group': 'dashboard group' + } + """ + def __init__(self, + query_to_merge: BaseRestApiQuery, + merge_key: str, + ) -> None: + self._query_to_merge = query_to_merge + self._merge_key = merge_key + self._computed_query_result: Dict[Any, Any] = dict() + + def merge_into(self, record_dict: dict) -> None: + """ + Merge results of query_to_merge into record_dict. Update record_dict in place. + + :param record_dict: the record_dict to be updated in place + :return: + """ + # compute query results for easy lookup later to find the exact record to merge + if not self._computed_query_result: + self._computed_query_result = self._compute_query_result() + + value_of_merge_key = record_dict.get(self._merge_key) + record_dict_to_merge = self._computed_query_result.get(value_of_merge_key) + if not record_dict_to_merge: + raise Exception(f'{self._merge_key} {value_of_merge_key} not found in query_to_merge results') + record_dict.update(record_dict_to_merge) + + def _compute_query_result(self) -> Dict[Any, Any]: + """ + Transform the query result to a dictionary. + + Assuming merge_key is 'dashboard_id' and self._query_to_merge.execute() returns + iter([{'dashboard_id': 'd1', 'dashboard_name': 'n1'}, {'dashboard_id': 'd2', 'dashboard_name': 'n2'}]), + the returned dict of this method will look like + { + 'd1': {'dashboard_id': 'd1', 'dashboard_name': 'n1'}, + 'd2': {'dashboard_id': 'd2', 'dashboard_name': 'n2'}, + } + + :return: a dictionary + """ + computed_query_results: Dict[Any, Any] = dict() + iterator = self._query_to_merge.execute() + + while True: + try: + record = next(iterator) + except StopIteration: + return computed_query_results + + value_of_merge_key = record[self._merge_key] + if value_of_merge_key in computed_query_results: + raise Exception( + f'merge_key {self._merge_key} value {value_of_merge_key} is not unique across the query results' + ) + + computed_query_results[value_of_merge_key] = record diff --git a/databuilder/databuilder/rest_api/rest_api_query.py b/databuilder/databuilder/rest_api/rest_api_query.py index 9282333945..7b5d010d88 100644 --- a/databuilder/databuilder/rest_api/rest_api_query.py +++ b/databuilder/databuilder/rest_api/rest_api_query.py @@ -12,6 +12,7 @@ from retrying import retry from databuilder.rest_api.base_rest_api_query import BaseRestApiQuery +from databuilder.rest_api.query_merger import QueryMerger LOGGER = logging.getLogger(__name__) @@ -62,6 +63,7 @@ def __init__(self, skip_no_result: bool = False, json_path_contains_or: bool = False, can_skip_failure: Callable = None, + query_merger: QueryMerger = None, **kwargs: Any ) -> None: """ @@ -101,6 +103,7 @@ def __init__(self, :param json_path_contains_or: JSON Path expression accepts | ( OR ) operation, mostly to extract values in different level. In this case, JSON Path will extract the value from first expression and then second, and so forth. + :param query_merger: to update record_dict yield by this rest api query with the query_merger's query results. Example: JSON result: @@ -130,6 +133,7 @@ def __init__(self, self._json_path_contains_or = json_path_contains_or self._can_skip_failure = can_skip_failure self._more_pages = False + self._query_merger = query_merger def execute(self) -> Iterator[Dict[str, Any]]: # noqa: C901 self._authenticate() @@ -177,10 +181,12 @@ def execute(self) -> Iterator[Dict[str, Any]]: # noqa: C901 if not sub_record or len(sub_record) != len(self._field_names): # skip the record continue - record_dict = copy.deepcopy(record_dict) + new_record_dict = copy.deepcopy(record_dict) for field_name in self._field_names: - record_dict[field_name] = sub_record.pop(0) - yield record_dict + new_record_dict[field_name] = sub_record.pop(0) + if self._query_merger: + self._query_merger.merge_into(new_record_dict) + yield new_record_dict self._post_process(response) diff --git a/databuilder/example/sample_data/dbt/catalog.json b/databuilder/example/sample_data/dbt/catalog.json new file mode 100644 index 0000000000..19d346f701 --- /dev/null +++ b/databuilder/example/sample_data/dbt/catalog.json @@ -0,0 +1 @@ +{"metadata": {"dbt_schema_version": "https://schemas.getdbt.com/dbt/catalog/v1.json", "dbt_version": "0.19.1", "generated_at": "2021-04-20T19:44:44.998264Z", "invocation_id": "2c6e4c85-3912-40cc-b613-0a50c65e32bc", "env": {}}, "nodes": {"model.dbt_demo.fact_catalog_returns": {"metadata": {"type": "VIEW", "schema": "PUBLIC", "name": "FACT_CATALOG_RETURNS", "database": "DBT_DEMO", "comment": null, "owner": "ACCOUNTADMIN"}, "columns": {"CR_ITEM_SK": {"type": "NUMBER", "index": 1, "name": "CR_ITEM_SK", "comment": null}, "CR_CALL_CENTER_SK": {"type": "NUMBER", "index": 2, "name": "CR_CALL_CENTER_SK", "comment": null}, "ITEM_CALL_CENTER_LOSS": {"type": "NUMBER", "index": 3, "name": "ITEM_CALL_CENTER_LOSS", "comment": null}, "ITEM_CALL_CENTER_CNT": {"type": "NUMBER", "index": 4, "name": "ITEM_CALL_CENTER_CNT", "comment": null}}, "stats": {"has_stats": {"id": "has_stats", "label": "Has Stats?", "value": false, "include": false, "description": "Indicates whether there are statistics for this table"}}, "unique_id": "model.dbt_demo.fact_catalog_returns"}, "model.dbt_demo.fact_third_party_performance": {"metadata": {"type": "VIEW", "schema": "PUBLIC", "name": "FACT_THIRD_PARTY_PERFORMANCE", "database": "DBT_DEMO", "comment": null, "owner": "ACCOUNTADMIN"}, "columns": {"COMPANY_ID": {"type": "NUMBER", "index": 1, "name": "COMPANY_ID", "comment": null}, "COMPANY_NAME": {"type": "TEXT", "index": 2, "name": "COMPANY_NAME", "comment": null}, "TOTAL_LOSS": {"type": "NUMBER", "index": 3, "name": "TOTAL_LOSS", "comment": null}, "AVG_ITEM_LOSS": {"type": "NUMBER", "index": 4, "name": "AVG_ITEM_LOSS", "comment": null}}, "stats": {"has_stats": {"id": "has_stats", "label": "Has Stats?", "value": false, "include": false, "description": "Indicates whether there are statistics for this table"}}, "unique_id": "model.dbt_demo.fact_third_party_performance"}, "model.dbt_demo.fact_warehouse_inventory": {"metadata": {"type": "VIEW", "schema": "PUBLIC", "name": "FACT_WAREHOUSE_INVENTORY", "database": "DBT_DEMO", "comment": null, "owner": "ACCOUNTADMIN"}, "columns": {"DT": {"type": "NUMBER", "index": 1, "name": "DT", "comment": null}, "W_WAREHOUSE_ID": {"type": "TEXT", "index": 2, "name": "W_WAREHOUSE_ID", "comment": null}, "INVENTORY_VALUE": {"type": "NUMBER", "index": 3, "name": "INVENTORY_VALUE", "comment": null}, "INVENTORY_COST": {"type": "NUMBER", "index": 4, "name": "INVENTORY_COST", "comment": null}}, "stats": {"has_stats": {"id": "has_stats", "label": "Has Stats?", "value": false, "include": false, "description": "Indicates whether there are statistics for this table"}}, "unique_id": "model.dbt_demo.fact_warehouse_inventory"}, "model.dbt_demo.raw_inventory_item_warehouse": {"metadata": {"type": "VIEW", "schema": "PUBLIC", "name": "RAW_INVENTORY_ITEM_WAREHOUSE", "database": "DBT_DEMO", "comment": null, "owner": "ACCOUNTADMIN"}, "columns": {"DT": {"type": "NUMBER", "index": 1, "name": "DT", "comment": null}, "W_WAREHOUSE_ID": {"type": "TEXT", "index": 2, "name": "W_WAREHOUSE_ID", "comment": null}, "ITEM_ID": {"type": "NUMBER", "index": 3, "name": "ITEM_ID", "comment": null}, "QUANTITY": {"type": "NUMBER", "index": 4, "name": "QUANTITY", "comment": null}, "ITEM_PRICE": {"type": "NUMBER", "index": 5, "name": "ITEM_PRICE", "comment": null}, "ITEM_COST": {"type": "NUMBER", "index": 6, "name": "ITEM_COST", "comment": null}, "INVENTORY_VALUE": {"type": "NUMBER", "index": 7, "name": "INVENTORY_VALUE", "comment": null}, "INVENTORY_COST": {"type": "NUMBER", "index": 8, "name": "INVENTORY_COST", "comment": null}}, "stats": {"has_stats": {"id": "has_stats", "label": "Has Stats?", "value": false, "include": false, "description": "Indicates whether there are statistics for this table"}}, "unique_id": "model.dbt_demo.raw_inventory_item_warehouse"}, "model.dbt_demo.raw_inventory_value": {"metadata": {"type": "VIEW", "schema": "PUBLIC", "name": "RAW_INVENTORY_VALUE", "database": "DBT_DEMO", "comment": null, "owner": "ACCOUNTADMIN"}, "columns": {"DT": {"type": "NUMBER", "index": 1, "name": "DT", "comment": null}, "QUANTITY": {"type": "NUMBER", "index": 2, "name": "QUANTITY", "comment": null}, "ITEM_PRICE": {"type": "NUMBER", "index": 3, "name": "ITEM_PRICE", "comment": null}, "ITEM_COST": {"type": "NUMBER", "index": 4, "name": "ITEM_COST", "comment": null}, "INVENTORY_VALUE": {"type": "NUMBER", "index": 5, "name": "INVENTORY_VALUE", "comment": null}, "INVENTORY_COST": {"type": "NUMBER", "index": 6, "name": "INVENTORY_COST", "comment": null}}, "stats": {"has_stats": {"id": "has_stats", "label": "Has Stats?", "value": false, "include": false, "description": "Indicates whether there are statistics for this table"}}, "unique_id": "model.dbt_demo.raw_inventory_value"}, "model.dbt_demo.fact_daily_expenses": {"metadata": {"type": "VIEW", "schema": "PUBLIC", "name": "COST_SUMMARY", "database": "DBT_DEMO", "comment": null, "owner": "ACCOUNTADMIN"}, "columns": {"DT": {"type": "NUMBER", "index": 1, "name": "DT", "comment": null}, "INVENTORY_COST": {"type": "NUMBER", "index": 2, "name": "INVENTORY_COST", "comment": null}}, "stats": {"has_stats": {"id": "has_stats", "label": "Has Stats?", "value": false, "include": false, "description": "Indicates whether there are statistics for this table"}}, "unique_id": "model.dbt_demo.fact_daily_expenses"}}, "sources": {}, "errors": null} diff --git a/databuilder/example/sample_data/dbt/manifest.json b/databuilder/example/sample_data/dbt/manifest.json new file mode 100644 index 0000000000..1504d7436c --- /dev/null +++ b/databuilder/example/sample_data/dbt/manifest.json @@ -0,0 +1 @@ +{"metadata": {"dbt_schema_version": "https://schemas.getdbt.com/dbt/manifest/v1.json", "dbt_version": "0.19.1", "generated_at": "2021-04-20T19:44:40.433304Z", "invocation_id": "2c6e4c85-3912-40cc-b613-0a50c65e32bc", "env": {}, "project_id": "f005e70093f1ddd3daa6316f5ec6f69e", "user_id": "8ea306a1-9d65-47b6-af2e-a29869f560dc", "send_anonymous_usage_stats": true, "adapter_type": "snowflake"}, "nodes": {"model.dbt_demo.fact_third_party_performance": {"raw_sql": "select\n third_party.cc_company as company_id,\n third_party.cc_company_name as company_name,\n sum(item_call_center_loss) as total_loss,\n sum(item_call_center_loss) / sum(item_call_center_cnt) as avg_item_loss\nfrom {{ ref('fact_catalog_returns') }} fact_catalog_returns\njoin \"SNOWFLAKE_SAMPLE_DATA\".\"TPCDS_SF100TCL\".\"CALL_CENTER\" as third_party\n on third_party.cc_call_center_sk = fact_catalog_returns.cr_call_center_sk\ngroup by 1,2\norder by 4 desc, 3 desc", "compiled": true, "resource_type": "model", "depends_on": {"macros": [], "nodes": ["model.dbt_demo.fact_catalog_returns"]}, "config": {"enabled": true, "materialized": "view", "persist_docs": {}, "vars": {}, "quoting": {}, "column_types": {}, "alias": null, "schema": null, "database": null, "tags": [], "full_refresh": null, "post-hook": [], "pre-hook": []}, "database": "DBT_DEMO", "schema": "PUBLIC", "fqn": ["dbt_demo", "call_center", "fact_third_party_performance"], "unique_id": "model.dbt_demo.fact_third_party_performance", "package_name": "dbt_demo", "root_path": "/Users/grantseward/GitHub/dbt_demo", "path": "call_center/fact_third_party_performance.sql", "original_file_path": "models/call_center/fact_third_party_performance.sql", "name": "fact_third_party_performance", "alias": "fact_third_party_performance", "checksum": {"name": "sha256", "checksum": "da5955ffcea53530dd0258ec7f786c2dee618c785966f3f1cae942ae257e4390"}, "tags": [], "refs": [["fact_catalog_returns"]], "sources": [], "description": "The performance for third party vendors loss rate by day.", "columns": {"company_id": {"name": "company_id", "description": "The ID for the company", "meta": {}, "data_type": null, "quote": null, "tags": ["column_tag", "goes_here"]}, "company_name": {"name": "company_name", "description": "", "meta": {}, "data_type": null, "quote": null, "tags": []}, "total_loss": {"name": "total_loss", "description": "The total loss for the given third party vendor", "meta": {}, "data_type": null, "quote": null, "tags": []}, "avg_item_loss": {"name": "avg_item_loss", "description": "The average loss per item for the given third party vendor", "meta": {}, "data_type": null, "quote": null, "tags": []}}, "meta": {}, "docs": {"show": true}, "patch_path": "models/call_center/schema.yml", "build_path": "target/compiled/dbt_demo/models/call_center/fact_third_party_performance.sql", "deferred": false, "unrendered_config": {}, "compiled_sql": "select\n third_party.cc_company as company_id,\n third_party.cc_company_name as company_name,\n sum(item_call_center_loss) as total_loss,\n sum(item_call_center_loss) / sum(item_call_center_cnt) as avg_item_loss\nfrom DBT_DEMO.PUBLIC.fact_catalog_returns fact_catalog_returns\njoin \"SNOWFLAKE_SAMPLE_DATA\".\"TPCDS_SF100TCL\".\"CALL_CENTER\" as third_party\n on third_party.cc_call_center_sk = fact_catalog_returns.cr_call_center_sk\ngroup by 1,2\norder by 4 desc, 3 desc", "extra_ctes_injected": true, "extra_ctes": [], "relation_name": "DBT_DEMO.PUBLIC.fact_third_party_performance"}, "model.dbt_demo.fact_catalog_returns": {"raw_sql": "select\n catalog_returns.cr_item_sk,\n catalog_returns.cr_call_center_sk,\n sum(catalog_returns.cr_net_loss) as item_call_center_loss,\n count(*) as item_call_center_cnt\nfrom (\n select\n catalog_returns.cr_item_sk,\n catalog_returns.cr_call_center_sk,\n catalog_returns.cr_net_loss\n from \"SNOWFLAKE_SAMPLE_DATA\".\"TPCDS_SF100TCL\".\"CATALOG_RETURNS\" as catalog_returns\n) catalog_returns\ngroup by 1,2\norder by 3 desc", "compiled": true, "resource_type": "model", "depends_on": {"macros": [], "nodes": []}, "config": {"enabled": true, "materialized": "view", "persist_docs": {}, "vars": {}, "quoting": {}, "column_types": {}, "alias": null, "schema": null, "database": null, "tags": [], "full_refresh": null, "post-hook": [], "pre-hook": []}, "database": "DBT_DEMO", "schema": "PUBLIC", "fqn": ["dbt_demo", "call_center", "fact_catalog_returns"], "unique_id": "model.dbt_demo.fact_catalog_returns", "package_name": "dbt_demo", "root_path": "/Users/grantseward/GitHub/dbt_demo", "path": "call_center/fact_catalog_returns.sql", "original_file_path": "models/call_center/fact_catalog_returns.sql", "name": "fact_catalog_returns", "alias": "fact_catalog_returns", "checksum": {"name": "sha256", "checksum": "e302d505ddbb07649638ddf5794e2e94d0831cf2f7cfcecc74e75b4326e4fd3c"}, "tags": [], "refs": [], "sources": [], "description": "Aggregate item to call center returns within the retail catalog.", "columns": {"cr_item_sk": {"name": "cr_item_sk", "description": "The item sku", "meta": {}, "data_type": null, "quote": null, "tags": []}, "cr_call_center_sk": {"name": "cr_call_center_sk", "description": "The call center ID", "meta": {}, "data_type": null, "quote": null, "tags": []}, "item_call_center_loss": {"name": "item_call_center_loss", "description": "The total loss for a given item / call center relationship", "meta": {}, "data_type": null, "quote": null, "tags": []}, "item_call_center_cnt": {"name": "item_call_center_cnt", "description": "The number of times a given item was returned for a call center", "meta": {}, "data_type": null, "quote": null, "tags": []}}, "meta": {}, "docs": {"show": true}, "patch_path": "models/call_center/schema.yml", "build_path": "target/compiled/dbt_demo/models/call_center/fact_catalog_returns.sql", "deferred": false, "unrendered_config": {}, "compiled_sql": "select\n catalog_returns.cr_item_sk,\n catalog_returns.cr_call_center_sk,\n sum(catalog_returns.cr_net_loss) as item_call_center_loss,\n count(*) as item_call_center_cnt\nfrom (\n select\n catalog_returns.cr_item_sk,\n catalog_returns.cr_call_center_sk,\n catalog_returns.cr_net_loss\n from \"SNOWFLAKE_SAMPLE_DATA\".\"TPCDS_SF100TCL\".\"CATALOG_RETURNS\" as catalog_returns\n) catalog_returns\ngroup by 1,2\norder by 3 desc", "extra_ctes_injected": true, "extra_ctes": [], "relation_name": "DBT_DEMO.PUBLIC.fact_catalog_returns"}, "model.dbt_demo.raw_inventory_value": {"raw_sql": "select\n inv_date_sk as dt,\n inv_quantity_on_hand as quantity,\n item.i_current_price as item_price,\n item.i_wholesale_cost as item_cost,\n item.i_current_price * inv_quantity_on_hand as inventory_value,\n item.i_wholesale_cost * inv_quantity_on_hand as inventory_cost\nfrom \"DBT_DEMO\".\"PUBLIC\".\"INVENTORY\" as inv\njoin \"DBT_DEMO\".\"PUBLIC\".\"ITEM\" as item on item.i_item_sk = inv.inv_item_sk", "compiled": true, "resource_type": "model", "depends_on": {"macros": [], "nodes": []}, "config": {"enabled": true, "materialized": "view", "persist_docs": {}, "vars": {}, "quoting": {}, "column_types": {}, "alias": null, "schema": null, "database": null, "tags": [], "full_refresh": null, "post-hook": [], "pre-hook": []}, "database": "DBT_DEMO", "schema": "PUBLIC", "fqn": ["dbt_demo", "inventory", "raw_inventory_value"], "unique_id": "model.dbt_demo.raw_inventory_value", "package_name": "dbt_demo", "root_path": "/Users/grantseward/GitHub/dbt_demo", "path": "inventory/raw_inventory_value.sql", "original_file_path": "models/inventory/raw_inventory_value.sql", "name": "raw_inventory_value", "alias": "raw_inventory_value", "checksum": {"name": "sha256", "checksum": "99344144b461e8fa8c1177c0651d8ec8941f7a6d573b0c2e288d715251e79804"}, "tags": [], "refs": [], "sources": [], "description": "", "columns": {}, "meta": {}, "docs": {"show": true}, "patch_path": null, "build_path": "target/compiled/dbt_demo/models/inventory/raw_inventory_value.sql", "deferred": false, "unrendered_config": {}, "compiled_sql": "select\n inv_date_sk as dt,\n inv_quantity_on_hand as quantity,\n item.i_current_price as item_price,\n item.i_wholesale_cost as item_cost,\n item.i_current_price * inv_quantity_on_hand as inventory_value,\n item.i_wholesale_cost * inv_quantity_on_hand as inventory_cost\nfrom \"DBT_DEMO\".\"PUBLIC\".\"INVENTORY\" as inv\njoin \"DBT_DEMO\".\"PUBLIC\".\"ITEM\" as item on item.i_item_sk = inv.inv_item_sk", "extra_ctes_injected": true, "extra_ctes": [], "relation_name": "DBT_DEMO.PUBLIC.raw_inventory_value"}, "model.dbt_demo.fact_warehouse_inventory": {"raw_sql": "select\n inv_wh.dt as dt,\n inv_wh.w_warehouse_id as w_warehouse_id,\n sum(inv_val.inventory_value) as inventory_value,\n sum(inv_val.inventory_cost) as inventory_cost\nfrom {{ ref('raw_inventory_item_warehouse') }} inv_wh\njoin {{ ref('raw_inventory_value') }} inv_val on inv_val.dt = inv_wh.dt\ngroup by 1,2\norder by 1,2", "compiled": true, "resource_type": "model", "depends_on": {"macros": [], "nodes": ["model.dbt_demo.raw_inventory_item_warehouse", "model.dbt_demo.raw_inventory_value"]}, "config": {"enabled": true, "materialized": "view", "persist_docs": {}, "vars": {}, "quoting": {}, "column_types": {}, "alias": null, "schema": null, "database": null, "tags": [], "full_refresh": null, "post-hook": [], "pre-hook": []}, "database": "DBT_DEMO", "schema": "PUBLIC", "fqn": ["dbt_demo", "inventory", "fact_warehouse_inventory"], "unique_id": "model.dbt_demo.fact_warehouse_inventory", "package_name": "dbt_demo", "root_path": "/Users/grantseward/GitHub/dbt_demo", "path": "inventory/fact_warehouse_inventory.sql", "original_file_path": "models/inventory/fact_warehouse_inventory.sql", "name": "fact_warehouse_inventory", "alias": "fact_warehouse_inventory", "checksum": {"name": "sha256", "checksum": "f6fef32ceda3ed0719d51fd0a07ab1e53532a403703dd41d39aca32354b92153"}, "tags": [], "refs": [["raw_inventory_item_warehouse"], ["raw_inventory_value"]], "sources": [], "description": "", "columns": {}, "meta": {}, "docs": {"show": true}, "patch_path": null, "build_path": "target/compiled/dbt_demo/models/inventory/fact_warehouse_inventory.sql", "deferred": false, "unrendered_config": {}, "compiled_sql": "select\n inv_wh.dt as dt,\n inv_wh.w_warehouse_id as w_warehouse_id,\n sum(inv_val.inventory_value) as inventory_value,\n sum(inv_val.inventory_cost) as inventory_cost\nfrom DBT_DEMO.PUBLIC.raw_inventory_item_warehouse inv_wh\njoin DBT_DEMO.PUBLIC.raw_inventory_value inv_val on inv_val.dt = inv_wh.dt\ngroup by 1,2\norder by 1,2", "extra_ctes_injected": true, "extra_ctes": [], "relation_name": "DBT_DEMO.PUBLIC.fact_warehouse_inventory"}, "model.dbt_demo.raw_inventory_item_warehouse": {"raw_sql": "select\n inv_date_sk as dt,\n wh.w_warehouse_id as w_warehouse_id,\n inv.inv_item_sk as item_id,\n inv.inv_quantity_on_hand as quantity,\n item.i_current_price as item_price,\n item.i_wholesale_cost as item_cost,\n item.i_current_price * inv_quantity_on_hand as inventory_value,\n item.i_wholesale_cost * inv_quantity_on_hand as inventory_cost\nfrom \"DBT_DEMO\".\"PUBLIC\".\"INVENTORY\" as inv\njoin \"DBT_DEMO\".\"PUBLIC\".\"ITEM\" as item on item.i_item_sk = inv.inv_item_sk\njoin \"DBT_DEMO\".\"PUBLIC\".\"WAREHOUSE\" as wh on wh.w_warehouse_sk = inv.inv_warehouse_sk\nlimit 100", "compiled": true, "resource_type": "model", "depends_on": {"macros": [], "nodes": []}, "config": {"enabled": true, "materialized": "view", "persist_docs": {}, "vars": {}, "quoting": {}, "column_types": {}, "alias": null, "schema": null, "database": null, "tags": [], "full_refresh": null, "post-hook": [], "pre-hook": []}, "database": "DBT_DEMO", "schema": "PUBLIC", "fqn": ["dbt_demo", "inventory", "raw_inventory_item_warehouse"], "unique_id": "model.dbt_demo.raw_inventory_item_warehouse", "package_name": "dbt_demo", "root_path": "/Users/grantseward/GitHub/dbt_demo", "path": "inventory/raw_inventory_item_warehouse.sql", "original_file_path": "models/inventory/raw_inventory_item_warehouse.sql", "name": "raw_inventory_item_warehouse", "alias": "raw_inventory_item_warehouse", "checksum": {"name": "sha256", "checksum": "543cf0bb276bbc1b5c6be16c9694a3eb0ee9e83d32f365a80fefe7058c09ee6e"}, "tags": [], "refs": [], "sources": [], "description": "", "columns": {}, "meta": {}, "docs": {"show": true}, "patch_path": null, "build_path": "target/compiled/dbt_demo/models/inventory/raw_inventory_item_warehouse.sql", "deferred": false, "unrendered_config": {}, "compiled_sql": "select\n inv_date_sk as dt,\n wh.w_warehouse_id as w_warehouse_id,\n inv.inv_item_sk as item_id,\n inv.inv_quantity_on_hand as quantity,\n item.i_current_price as item_price,\n item.i_wholesale_cost as item_cost,\n item.i_current_price * inv_quantity_on_hand as inventory_value,\n item.i_wholesale_cost * inv_quantity_on_hand as inventory_cost\nfrom \"DBT_DEMO\".\"PUBLIC\".\"INVENTORY\" as inv\njoin \"DBT_DEMO\".\"PUBLIC\".\"ITEM\" as item on item.i_item_sk = inv.inv_item_sk\njoin \"DBT_DEMO\".\"PUBLIC\".\"WAREHOUSE\" as wh on wh.w_warehouse_sk = inv.inv_warehouse_sk\nlimit 100", "extra_ctes_injected": true, "extra_ctes": [], "relation_name": "DBT_DEMO.PUBLIC.raw_inventory_item_warehouse"}, "model.dbt_demo.fact_daily_expenses": {"raw_sql": "{{ config(alias='cost_summary', schema='PUBLIC', tags=['finance', 'certified'], comment='this is a comment') }}\n\n\nselect\n inv_wh.dt as dt,\n inventory_cost\nfrom {{ ref('fact_warehouse_inventory') }} inv_wh", "compiled": true, "resource_type": "model", "depends_on": {"macros": [], "nodes": ["model.dbt_demo.fact_warehouse_inventory"]}, "config": {"enabled": true, "materialized": "view", "persist_docs": {}, "vars": {}, "quoting": {}, "column_types": {}, "alias": "cost_summary", "schema": "PUBLIC", "database": null, "tags": ["finance", "certified"], "full_refresh": null, "comment": "this is a comment", "post-hook": [], "pre-hook": []}, "database": "DBT_DEMO", "schema": "PUBLIC", "fqn": ["dbt_demo", "finance", "fact_daily_expenses"], "unique_id": "model.dbt_demo.fact_daily_expenses", "package_name": "dbt_demo", "root_path": "/Users/grantseward/GitHub/dbt_demo", "path": "finance/fact_daily_expenses.sql", "original_file_path": "models/finance/fact_daily_expenses.sql", "name": "fact_daily_expenses", "alias": "cost_summary", "checksum": {"name": "sha256", "checksum": "b46ac277061465d252c4912cf8f8c098f2d3d5200b9c330eba9908f2a9d53b44"}, "tags": ["finance", "certified"], "refs": [["fact_warehouse_inventory"]], "sources": [], "description": "", "columns": {}, "meta": {}, "docs": {"show": true}, "patch_path": null, "build_path": "target/compiled/dbt_demo/models/finance/fact_daily_expenses.sql", "deferred": false, "unrendered_config": {"alias": "cost_summary", "schema": "PUBLIC", "tags": ["finance", "certified"], "comment": "this is a comment"}, "compiled_sql": "\n\n\nselect\n inv_wh.dt as dt,\n inventory_cost\nfrom DBT_DEMO.PUBLIC.fact_warehouse_inventory inv_wh", "extra_ctes_injected": true, "extra_ctes": [], "relation_name": "DBT_DEMO.PUBLIC.cost_summary"}, "test.dbt_demo.unique_fact_catalog_returns_cr_item_sk": {"raw_sql": "{{ config(severity='ERROR') }}{{ test_unique(**_dbt_schema_test_kwargs) }}", "test_metadata": {"name": "unique", "kwargs": {"column_name": "cr_item_sk", "model": "{{ ref('fact_catalog_returns') }}"}, "namespace": null}, "compiled": true, "resource_type": "test", "depends_on": {"macros": ["macro.dbt.test_unique"], "nodes": ["model.dbt_demo.fact_catalog_returns"]}, "config": {"enabled": true, "materialized": "test", "persist_docs": {}, "vars": {}, "quoting": {}, "column_types": {}, "alias": null, "schema": null, "database": null, "tags": [], "full_refresh": null, "severity": "ERROR", "post-hook": [], "pre-hook": []}, "database": "DBT_DEMO", "schema": "PUBLIC", "fqn": ["dbt_demo", "schema_test", "unique_fact_catalog_returns_cr_item_sk"], "unique_id": "test.dbt_demo.unique_fact_catalog_returns_cr_item_sk", "package_name": "dbt_demo", "root_path": "/Users/grantseward/GitHub/dbt_demo", "path": "schema_test/unique_fact_catalog_returns_cr_item_sk.sql", "original_file_path": "models/call_center/schema.yml", "name": "unique_fact_catalog_returns_cr_item_sk", "alias": "unique_fact_catalog_returns_cr_item_sk", "checksum": {"name": "none", "checksum": ""}, "tags": ["schema"], "refs": [["fact_catalog_returns"]], "sources": [], "description": "", "columns": {}, "meta": {}, "docs": {"show": true}, "patch_path": null, "build_path": "target/compiled/dbt_demo/models/call_center/schema.yml/schema_test/unique_fact_catalog_returns_cr_item_sk.sql", "deferred": false, "unrendered_config": {"severity": "ERROR"}, "compiled_sql": "\n \n \n\n\n\nselect count(*) as validation_errors\nfrom (\n\n select\n cr_item_sk\n\n from DBT_DEMO.PUBLIC.fact_catalog_returns\n where cr_item_sk is not null\n group by cr_item_sk\n having count(*) > 1\n\n) validation_errors\n\n\n", "extra_ctes_injected": true, "extra_ctes": [], "relation_name": null, "column_name": "cr_item_sk"}, "test.dbt_demo.not_null_fact_catalog_returns_cr_item_sk": {"raw_sql": "{{ config(severity='ERROR') }}{{ test_not_null(**_dbt_schema_test_kwargs) }}", "test_metadata": {"name": "not_null", "kwargs": {"column_name": "cr_item_sk", "model": "{{ ref('fact_catalog_returns') }}"}, "namespace": null}, "compiled": true, "resource_type": "test", "depends_on": {"macros": ["macro.dbt.test_not_null"], "nodes": ["model.dbt_demo.fact_catalog_returns"]}, "config": {"enabled": true, "materialized": "test", "persist_docs": {}, "vars": {}, "quoting": {}, "column_types": {}, "alias": null, "schema": null, "database": null, "tags": [], "full_refresh": null, "severity": "ERROR", "post-hook": [], "pre-hook": []}, "database": "DBT_DEMO", "schema": "PUBLIC", "fqn": ["dbt_demo", "schema_test", "not_null_fact_catalog_returns_cr_item_sk"], "unique_id": "test.dbt_demo.not_null_fact_catalog_returns_cr_item_sk", "package_name": "dbt_demo", "root_path": "/Users/grantseward/GitHub/dbt_demo", "path": "schema_test/not_null_fact_catalog_returns_cr_item_sk.sql", "original_file_path": "models/call_center/schema.yml", "name": "not_null_fact_catalog_returns_cr_item_sk", "alias": "not_null_fact_catalog_returns_cr_item_sk", "checksum": {"name": "none", "checksum": ""}, "tags": ["schema"], "refs": [["fact_catalog_returns"]], "sources": [], "description": "", "columns": {}, "meta": {}, "docs": {"show": true}, "patch_path": null, "build_path": "target/compiled/dbt_demo/models/call_center/schema.yml/schema_test/not_null_fact_catalog_returns_cr_item_sk.sql", "deferred": false, "unrendered_config": {"severity": "ERROR"}, "compiled_sql": "\n \n \n\n\n\nselect count(*) as validation_errors\nfrom DBT_DEMO.PUBLIC.fact_catalog_returns\nwhere cr_item_sk is null\n\n\n", "extra_ctes_injected": true, "extra_ctes": [], "relation_name": null, "column_name": "cr_item_sk"}, "test.dbt_demo.unique_fact_third_party_performance_company_id": {"raw_sql": "{{ config(severity='ERROR') }}{{ test_unique(**_dbt_schema_test_kwargs) }}", "test_metadata": {"name": "unique", "kwargs": {"column_name": "company_id", "model": "{{ ref('fact_third_party_performance') }}"}, "namespace": null}, "compiled": true, "resource_type": "test", "depends_on": {"macros": ["macro.dbt.test_unique"], "nodes": ["model.dbt_demo.fact_third_party_performance"]}, "config": {"enabled": true, "materialized": "test", "persist_docs": {}, "vars": {}, "quoting": {}, "column_types": {}, "alias": null, "schema": null, "database": null, "tags": [], "full_refresh": null, "severity": "ERROR", "post-hook": [], "pre-hook": []}, "database": "DBT_DEMO", "schema": "PUBLIC", "fqn": ["dbt_demo", "schema_test", "unique_fact_third_party_performance_company_id"], "unique_id": "test.dbt_demo.unique_fact_third_party_performance_company_id", "package_name": "dbt_demo", "root_path": "/Users/grantseward/GitHub/dbt_demo", "path": "schema_test/unique_fact_third_party_performance_company_id.sql", "original_file_path": "models/call_center/schema.yml", "name": "unique_fact_third_party_performance_company_id", "alias": "unique_fact_third_party_performance_company_id", "checksum": {"name": "none", "checksum": ""}, "tags": ["column_tag", "goes_here", "schema"], "refs": [["fact_third_party_performance"]], "sources": [], "description": "", "columns": {}, "meta": {}, "docs": {"show": true}, "patch_path": null, "build_path": "target/compiled/dbt_demo/models/call_center/schema.yml/schema_test/unique_fact_third_party_performance_company_id.sql", "deferred": false, "unrendered_config": {"severity": "ERROR"}, "compiled_sql": "\n \n \n\n\n\nselect count(*) as validation_errors\nfrom (\n\n select\n company_id\n\n from DBT_DEMO.PUBLIC.fact_third_party_performance\n where company_id is not null\n group by company_id\n having count(*) > 1\n\n) validation_errors\n\n\n", "extra_ctes_injected": true, "extra_ctes": [], "relation_name": null, "column_name": "company_id"}, "test.dbt_demo.not_null_fact_third_party_performance_company_id": {"raw_sql": "{{ config(severity='ERROR') }}{{ test_not_null(**_dbt_schema_test_kwargs) }}", "test_metadata": {"name": "not_null", "kwargs": {"column_name": "company_id", "model": "{{ ref('fact_third_party_performance') }}"}, "namespace": null}, "compiled": true, "resource_type": "test", "depends_on": {"macros": ["macro.dbt.test_not_null"], "nodes": ["model.dbt_demo.fact_third_party_performance"]}, "config": {"enabled": true, "materialized": "test", "persist_docs": {}, "vars": {}, "quoting": {}, "column_types": {}, "alias": null, "schema": null, "database": null, "tags": [], "full_refresh": null, "severity": "ERROR", "post-hook": [], "pre-hook": []}, "database": "DBT_DEMO", "schema": "PUBLIC", "fqn": ["dbt_demo", "schema_test", "not_null_fact_third_party_performance_company_id"], "unique_id": "test.dbt_demo.not_null_fact_third_party_performance_company_id", "package_name": "dbt_demo", "root_path": "/Users/grantseward/GitHub/dbt_demo", "path": "schema_test/not_null_fact_third_party_performance_company_id.sql", "original_file_path": "models/call_center/schema.yml", "name": "not_null_fact_third_party_performance_company_id", "alias": "not_null_fact_third_party_performance_company_id", "checksum": {"name": "none", "checksum": ""}, "tags": ["column_tag", "goes_here", "schema"], "refs": [["fact_third_party_performance"]], "sources": [], "description": "", "columns": {}, "meta": {}, "docs": {"show": true}, "patch_path": null, "build_path": "target/compiled/dbt_demo/models/call_center/schema.yml/schema_test/not_null_fact_third_party_performance_company_id.sql", "deferred": false, "unrendered_config": {"severity": "ERROR"}, "compiled_sql": "\n \n \n\n\n\nselect count(*) as validation_errors\nfrom DBT_DEMO.PUBLIC.fact_third_party_performance\nwhere company_id is null\n\n\n", "extra_ctes_injected": true, "extra_ctes": [], "relation_name": null, "column_name": "company_id"}, "test.dbt_demo.unique_fact_third_party_performance_company_name": {"raw_sql": "{{ config(severity='ERROR') }}{{ test_unique(**_dbt_schema_test_kwargs) }}", "test_metadata": {"name": "unique", "kwargs": {"column_name": "company_name", "model": "{{ ref('fact_third_party_performance') }}"}, "namespace": null}, "compiled": true, "resource_type": "test", "depends_on": {"macros": ["macro.dbt.test_unique"], "nodes": ["model.dbt_demo.fact_third_party_performance"]}, "config": {"enabled": true, "materialized": "test", "persist_docs": {}, "vars": {}, "quoting": {}, "column_types": {}, "alias": null, "schema": null, "database": null, "tags": [], "full_refresh": null, "severity": "ERROR", "post-hook": [], "pre-hook": []}, "database": "DBT_DEMO", "schema": "PUBLIC", "fqn": ["dbt_demo", "schema_test", "unique_fact_third_party_performance_company_name"], "unique_id": "test.dbt_demo.unique_fact_third_party_performance_company_name", "package_name": "dbt_demo", "root_path": "/Users/grantseward/GitHub/dbt_demo", "path": "schema_test/unique_fact_third_party_performance_company_name.sql", "original_file_path": "models/call_center/schema.yml", "name": "unique_fact_third_party_performance_company_name", "alias": "unique_fact_third_party_performance_company_name", "checksum": {"name": "none", "checksum": ""}, "tags": ["schema"], "refs": [["fact_third_party_performance"]], "sources": [], "description": "", "columns": {}, "meta": {}, "docs": {"show": true}, "patch_path": null, "build_path": "target/compiled/dbt_demo/models/call_center/schema.yml/schema_test/unique_fact_third_party_performance_company_name.sql", "deferred": false, "unrendered_config": {"severity": "ERROR"}, "compiled_sql": "\n \n \n\n\n\nselect count(*) as validation_errors\nfrom (\n\n select\n company_name\n\n from DBT_DEMO.PUBLIC.fact_third_party_performance\n where company_name is not null\n group by company_name\n having count(*) > 1\n\n) validation_errors\n\n\n", "extra_ctes_injected": true, "extra_ctes": [], "relation_name": null, "column_name": "company_name"}, "test.dbt_demo.not_null_fact_third_party_performance_company_name": {"raw_sql": "{{ config(severity='ERROR') }}{{ test_not_null(**_dbt_schema_test_kwargs) }}", "test_metadata": {"name": "not_null", "kwargs": {"column_name": "company_name", "model": "{{ ref('fact_third_party_performance') }}"}, "namespace": null}, "compiled": true, "resource_type": "test", "depends_on": {"macros": ["macro.dbt.test_not_null"], "nodes": ["model.dbt_demo.fact_third_party_performance"]}, "config": {"enabled": true, "materialized": "test", "persist_docs": {}, "vars": {}, "quoting": {}, "column_types": {}, "alias": null, "schema": null, "database": null, "tags": [], "full_refresh": null, "severity": "ERROR", "post-hook": [], "pre-hook": []}, "database": "DBT_DEMO", "schema": "PUBLIC", "fqn": ["dbt_demo", "schema_test", "not_null_fact_third_party_performance_company_name"], "unique_id": "test.dbt_demo.not_null_fact_third_party_performance_company_name", "package_name": "dbt_demo", "root_path": "/Users/grantseward/GitHub/dbt_demo", "path": "schema_test/not_null_fact_third_party_performance_company_name.sql", "original_file_path": "models/call_center/schema.yml", "name": "not_null_fact_third_party_performance_company_name", "alias": "not_null_fact_third_party_performance_company_name", "checksum": {"name": "none", "checksum": ""}, "tags": ["schema"], "refs": [["fact_third_party_performance"]], "sources": [], "description": "", "columns": {}, "meta": {}, "docs": {"show": true}, "patch_path": null, "build_path": "target/compiled/dbt_demo/models/call_center/schema.yml/schema_test/not_null_fact_third_party_performance_company_name.sql", "deferred": false, "unrendered_config": {"severity": "ERROR"}, "compiled_sql": "\n \n \n\n\n\nselect count(*) as validation_errors\nfrom DBT_DEMO.PUBLIC.fact_third_party_performance\nwhere company_name is null\n\n\n", "extra_ctes_injected": true, "extra_ctes": [], "relation_name": null, "column_name": "company_name"}}, "sources": {}, "macros": {"macro.dbt_snowflake.snowflake__get_catalog": {"unique_id": "macro.dbt_snowflake.snowflake__get_catalog", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/catalog.sql", "original_file_path": "macros/catalog.sql", "name": "snowflake__get_catalog", "macro_sql": "{% macro snowflake__get_catalog(information_schema, schemas) -%}\n {% set query %}\n with tables as (\n\n select\n table_catalog as \"table_database\",\n table_schema as \"table_schema\",\n table_name as \"table_name\",\n table_type as \"table_type\",\n comment as \"table_comment\",\n\n -- note: this is the _role_ that owns the table\n table_owner as \"table_owner\",\n\n 'Clustering Key' as \"stats:clustering_key:label\",\n clustering_key as \"stats:clustering_key:value\",\n 'The key used to cluster this table' as \"stats:clustering_key:description\",\n (clustering_key is not null) as \"stats:clustering_key:include\",\n\n 'Row Count' as \"stats:row_count:label\",\n row_count as \"stats:row_count:value\",\n 'An approximate count of rows in this table' as \"stats:row_count:description\",\n (row_count is not null) as \"stats:row_count:include\",\n\n 'Approximate Size' as \"stats:bytes:label\",\n bytes as \"stats:bytes:value\",\n 'Approximate size of the table as reported by Snowflake' as \"stats:bytes:description\",\n (bytes is not null) as \"stats:bytes:include\",\n\n 'Last Modified' as \"stats:last_modified:label\",\n to_varchar(convert_timezone('UTC', last_altered), 'yyyy-mm-dd HH24:MI'||'UTC') as \"stats:last_modified:value\",\n 'The timestamp for last update/change' as \"stats:last_modified:description\",\n (last_altered is not null and table_type='BASE TABLE') as \"stats:last_modified:include\"\n\n from {{ information_schema }}.tables\n\n ),\n\n columns as (\n\n select\n table_catalog as \"table_database\",\n table_schema as \"table_schema\",\n table_name as \"table_name\",\n\n column_name as \"column_name\",\n ordinal_position as \"column_index\",\n data_type as \"column_type\",\n comment as \"column_comment\"\n\n from {{ information_schema }}.columns\n )\n\n select *\n from tables\n join columns using (\"table_database\", \"table_schema\", \"table_name\")\n where (\n {%- for schema in schemas -%}\n upper(\"table_schema\") = upper('{{ schema }}'){%- if not loop.last %} or {% endif -%}\n {%- endfor -%}\n )\n order by \"column_index\"\n {%- endset -%}\n\n {{ return(run_query(query)) }}\n\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.snowflake__create_table_as": {"unique_id": "macro.dbt_snowflake.snowflake__create_table_as", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "snowflake__create_table_as", "macro_sql": "{% macro snowflake__create_table_as(temporary, relation, sql) -%}\n {%- set transient = config.get('transient', default=true) -%}\n {%- set cluster_by_keys = config.get('cluster_by', default=none) -%}\n {%- set enable_automatic_clustering = config.get('automatic_clustering', default=false) -%}\n {%- set copy_grants = config.get('copy_grants', default=false) -%}\n\n {%- if cluster_by_keys is not none and cluster_by_keys is string -%}\n {%- set cluster_by_keys = [cluster_by_keys] -%}\n {%- endif -%}\n {%- if cluster_by_keys is not none -%}\n {%- set cluster_by_string = cluster_by_keys|join(\", \")-%}\n {% else %}\n {%- set cluster_by_string = none -%}\n {%- endif -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none }}\n\n create or replace {% if temporary -%}\n temporary\n {%- elif transient -%}\n transient\n {%- endif %} table {{ relation }} {% if copy_grants and not temporary -%} copy grants {%- endif %} as\n (\n {%- if cluster_by_string is not none -%}\n select * from(\n {{ sql }}\n ) order by ({{ cluster_by_string }})\n {%- else -%}\n {{ sql }}\n {%- endif %}\n );\n {% if cluster_by_string is not none and not temporary -%}\n alter table {{relation}} cluster by ({{cluster_by_string}});\n {%- endif -%}\n {% if enable_automatic_clustering and cluster_by_string is not none and not temporary -%}\n alter table {{relation}} resume recluster;\n {%- endif -%}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.snowflake__create_view_as": {"unique_id": "macro.dbt_snowflake.snowflake__create_view_as", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "snowflake__create_view_as", "macro_sql": "{% macro snowflake__create_view_as(relation, sql) -%}\n {%- set secure = config.get('secure', default=false) -%}\n {%- set copy_grants = config.get('copy_grants', default=false) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none }}\n create or replace {% if secure -%}\n secure\n {%- endif %} view {{ relation }} {% if copy_grants -%} copy grants {%- endif %} as (\n {{ sql }}\n );\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.snowflake__get_columns_in_relation": {"unique_id": "macro.dbt_snowflake.snowflake__get_columns_in_relation", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "snowflake__get_columns_in_relation", "macro_sql": "{% macro snowflake__get_columns_in_relation(relation) -%}\n {%- set sql -%}\n describe table {{ relation }}\n {%- endset -%}\n {%- set result = run_query(sql) -%}\n\n {% set maximum = 10000 %}\n {% if (result | length) >= maximum %}\n {% set msg %}\n Too many columns in relation {{ relation }}! dbt can only get\n information about relations with fewer than {{ maximum }} columns.\n {% endset %}\n {% do exceptions.raise_compiler_error(msg) %}\n {% endif %}\n\n {% set columns = [] %}\n {% for row in result %}\n {% do columns.append(api.Column.from_description(row['name'], row['type'])) %}\n {% endfor %}\n {% do return(columns) %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.snowflake__list_schemas": {"unique_id": "macro.dbt_snowflake.snowflake__list_schemas", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "snowflake__list_schemas", "macro_sql": "{% macro snowflake__list_schemas(database) -%}\n {# 10k limit from here: https://docs.snowflake.net/manuals/sql-reference/sql/show-schemas.html#usage-notes #}\n {% set maximum = 10000 %}\n {% set sql -%}\n show terse schemas in database {{ database }}\n limit {{ maximum }}\n {%- endset %}\n {% set result = run_query(sql) %}\n {% if (result | length) >= maximum %}\n {% set msg %}\n Too many schemas in database {{ database }}! dbt can only get\n information about databases with fewer than {{ maximum }} schemas.\n {% endset %}\n {% do exceptions.raise_compiler_error(msg) %}\n {% endif %}\n {{ return(result) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.snowflake__list_relations_without_caching": {"unique_id": "macro.dbt_snowflake.snowflake__list_relations_without_caching", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "snowflake__list_relations_without_caching", "macro_sql": "{% macro snowflake__list_relations_without_caching(schema_relation) %}\n {%- set sql -%}\n show terse objects in {{ schema_relation }}\n {%- endset -%}\n\n {%- set result = run_query(sql) -%}\n {% set maximum = 10000 %}\n {% if (result | length) >= maximum %}\n {% set msg %}\n Too many schemas in schema {{ schema_relation }}! dbt can only get\n information about schemas with fewer than {{ maximum }} objects.\n {% endset %}\n {% do exceptions.raise_compiler_error(msg) %}\n {% endif %}\n {%- do return(result) -%}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.snowflake__check_schema_exists": {"unique_id": "macro.dbt_snowflake.snowflake__check_schema_exists", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "snowflake__check_schema_exists", "macro_sql": "{% macro snowflake__check_schema_exists(information_schema, schema) -%}\n {% call statement('check_schema_exists', fetch_result=True) -%}\n select count(*)\n from {{ information_schema }}.schemata\n where upper(schema_name) = upper('{{ schema }}')\n and upper(catalog_name) = upper('{{ information_schema.database }}')\n {%- endcall %}\n {{ return(load_result('check_schema_exists').table) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.snowflake__current_timestamp": {"unique_id": "macro.dbt_snowflake.snowflake__current_timestamp", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "snowflake__current_timestamp", "macro_sql": "{% macro snowflake__current_timestamp() -%}\n convert_timezone('UTC', current_timestamp())\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.snowflake__snapshot_string_as_time": {"unique_id": "macro.dbt_snowflake.snowflake__snapshot_string_as_time", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "snowflake__snapshot_string_as_time", "macro_sql": "{% macro snowflake__snapshot_string_as_time(timestamp) -%}\n {%- set result = \"to_timestamp_ntz('\" ~ timestamp ~ \"')\" -%}\n {{ return(result) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.snowflake__snapshot_get_time": {"unique_id": "macro.dbt_snowflake.snowflake__snapshot_get_time", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "snowflake__snapshot_get_time", "macro_sql": "{% macro snowflake__snapshot_get_time() -%}\n to_timestamp_ntz({{ current_timestamp() }})\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.snowflake__rename_relation": {"unique_id": "macro.dbt_snowflake.snowflake__rename_relation", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "snowflake__rename_relation", "macro_sql": "{% macro snowflake__rename_relation(from_relation, to_relation) -%}\n {% call statement('rename_relation') -%}\n alter table {{ from_relation }} rename to {{ to_relation }}\n {%- endcall %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.snowflake__alter_column_type": {"unique_id": "macro.dbt_snowflake.snowflake__alter_column_type", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "snowflake__alter_column_type", "macro_sql": "{% macro snowflake__alter_column_type(relation, column_name, new_column_type) -%}\n {% call statement('alter_column_type') %}\n alter table {{ relation }} alter {{ adapter.quote(column_name) }} set data type {{ new_column_type }};\n {% endcall %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.snowflake__alter_relation_comment": {"unique_id": "macro.dbt_snowflake.snowflake__alter_relation_comment", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "snowflake__alter_relation_comment", "macro_sql": "{% macro snowflake__alter_relation_comment(relation, relation_comment) -%}\n comment on {{ relation.type }} {{ relation }} IS $${{ relation_comment | replace('$', '[$]') }}$$;\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.snowflake__alter_column_comment": {"unique_id": "macro.dbt_snowflake.snowflake__alter_column_comment", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "snowflake__alter_column_comment", "macro_sql": "{% macro snowflake__alter_column_comment(relation, column_dict) -%}\n alter {{ relation.type }} {{ relation }} alter\n {% for column_name in column_dict %}\n {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} COMMENT $${{ column_dict[column_name]['description'] | replace('$', '[$]') }}$$ {{ ',' if not loop.last else ';' }}\n {% endfor %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.get_current_query_tag": {"unique_id": "macro.dbt_snowflake.get_current_query_tag", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "get_current_query_tag", "macro_sql": "{% macro get_current_query_tag() -%}\n {{ return(run_query(\"show parameters like 'query_tag' in session\").rows[0]['value']) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.set_query_tag": {"unique_id": "macro.dbt_snowflake.set_query_tag", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "set_query_tag", "macro_sql": "{% macro set_query_tag() -%}\n {% set new_query_tag = config.get('query_tag') %}\n {% if new_query_tag %}\n {% set original_query_tag = get_current_query_tag() %}\n {{ log(\"Setting query_tag to '\" ~ new_query_tag ~ \"'. Will reset to '\" ~ original_query_tag ~ \"' after materialization.\") }}\n {% do run_query(\"alter session set query_tag = '{}'\".format(new_query_tag)) %}\n {{ return(original_query_tag)}}\n {% endif %}\n {{ return(none)}}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.unset_query_tag": {"unique_id": "macro.dbt_snowflake.unset_query_tag", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/adapters.sql", "original_file_path": "macros/adapters.sql", "name": "unset_query_tag", "macro_sql": "{% macro unset_query_tag(original_query_tag) -%}\n {% set new_query_tag = config.get('query_tag') %}\n {% if new_query_tag %}\n {% if original_query_tag %}\n {{ log(\"Resetting query_tag to '\" ~ original_query_tag ~ \"'.\") }}\n {% do run_query(\"alter session set query_tag = '{}'\".format(original_query_tag)) %}\n {% else %}\n {{ log(\"No original query_tag, unsetting parameter.\") }}\n {% do run_query(\"alter session unset query_tag\") %}\n {% endif %}\n {% endif %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.snowflake__get_merge_sql": {"unique_id": "macro.dbt_snowflake.snowflake__get_merge_sql", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/materializations/merge.sql", "original_file_path": "macros/materializations/merge.sql", "name": "snowflake__get_merge_sql", "macro_sql": "{% macro snowflake__get_merge_sql(target, source_sql, unique_key, dest_columns, predicates) -%}\n\n {#\n Workaround for Snowflake not being happy with a merge on a constant-false predicate.\n When no unique_key is provided, this macro will do a regular insert. If a unique_key\n is provided, then this macro will do a proper merge instead.\n #}\n\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute='name')) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {%- if unique_key is none -%}\n\n {{ sql_header if sql_header is not none }}\n\n insert into {{ target }} ({{ dest_cols_csv }})\n (\n select {{ dest_cols_csv }}\n from {{ source_sql }}\n );\n\n {%- else -%}\n\n {{ default__get_merge_sql(target, source_sql, unique_key, dest_columns, predicates) }}\n\n {%- endif -%}\n\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.materialization_view_snowflake": {"unique_id": "macro.dbt_snowflake.materialization_view_snowflake", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/materializations/view.sql", "original_file_path": "macros/materializations/view.sql", "name": "materialization_view_snowflake", "macro_sql": "{% materialization view, adapter='snowflake' -%}\n\n {% set original_query_tag = set_query_tag() %}\n {% set to_return = create_or_replace_view() %}\n\n {% set target_relation = this.incorporate(type='view') %}\n {% do persist_docs(target_relation, model, for_columns=false) %}\n\n {% do return(to_return) %}\n\n {% do unset_query_tag(original_query_tag) %}\n\n{%- endmaterialization %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.materialization_table_snowflake": {"unique_id": "macro.dbt_snowflake.materialization_table_snowflake", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/materializations/table.sql", "original_file_path": "macros/materializations/table.sql", "name": "materialization_table_snowflake", "macro_sql": "{% materialization table, adapter='snowflake' %}\n\n {% set original_query_tag = set_query_tag() %}\n\n {%- set identifier = model['alias'] -%}\n\n {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}\n {%- set target_relation = api.Relation.create(identifier=identifier,\n schema=schema,\n database=database, type='table') -%}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n {#-- Drop the relation if it was a view to \"convert\" it in a table. This may lead to\n -- downtime, but it should be a relatively infrequent occurrence #}\n {% if old_relation is not none and not old_relation.is_table %}\n {{ log(\"Dropping relation \" ~ old_relation ~ \" because it is of type \" ~ old_relation.type) }}\n {{ drop_relation_if_exists(old_relation) }}\n {% endif %}\n\n --build model\n {% call statement('main') -%}\n {{ create_table_as(false, target_relation, sql) }}\n {%- endcall %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n -- `COMMIT` happens here\n {{ adapter.commit() }}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {% do persist_docs(target_relation, model) %}\n\n {% do unset_query_tag(original_query_tag) %}\n\n {{ return({'relations': [target_relation]}) }}\n\n{% endmaterialization %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.dbt_snowflake_validate_get_incremental_strategy": {"unique_id": "macro.dbt_snowflake.dbt_snowflake_validate_get_incremental_strategy", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/materializations/incremental.sql", "original_file_path": "macros/materializations/incremental.sql", "name": "dbt_snowflake_validate_get_incremental_strategy", "macro_sql": "{% macro dbt_snowflake_validate_get_incremental_strategy(config) %}\n {#-- Find and validate the incremental strategy #}\n {%- set strategy = config.get(\"incremental_strategy\", default=\"merge\") -%}\n\n {% set invalid_strategy_msg -%}\n Invalid incremental strategy provided: {{ strategy }}\n Expected one of: 'merge', 'delete+insert'\n {%- endset %}\n {% if strategy not in ['merge', 'delete+insert'] %}\n {% do exceptions.raise_compiler_error(invalid_strategy_msg) %}\n {% endif %}\n\n {% do return(strategy) %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.dbt_snowflake_get_incremental_sql": {"unique_id": "macro.dbt_snowflake.dbt_snowflake_get_incremental_sql", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/materializations/incremental.sql", "original_file_path": "macros/materializations/incremental.sql", "name": "dbt_snowflake_get_incremental_sql", "macro_sql": "{% macro dbt_snowflake_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key, dest_columns) %}\n {% if strategy == 'merge' %}\n {% do return(get_merge_sql(target_relation, tmp_relation, unique_key, dest_columns)) %}\n {% elif strategy == 'delete+insert' %}\n {% do return(get_delete_insert_merge_sql(target_relation, tmp_relation, unique_key, dest_columns)) %}\n {% else %}\n {% do exceptions.raise_compiler_error('invalid strategy: ' ~ strategy) %}\n {% endif %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt_snowflake.materialization_incremental_snowflake": {"unique_id": "macro.dbt_snowflake.materialization_incremental_snowflake", "package_name": "dbt_snowflake", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/snowflake", "path": "macros/materializations/incremental.sql", "original_file_path": "macros/materializations/incremental.sql", "name": "materialization_incremental_snowflake", "macro_sql": "{% materialization incremental, adapter='snowflake' -%}\n\n {% set original_query_tag = set_query_tag() %}\n\n {%- set unique_key = config.get('unique_key') -%}\n {%- set full_refresh_mode = (should_full_refresh()) -%}\n\n {% set target_relation = this %}\n {% set existing_relation = load_relation(this) %}\n {% set tmp_relation = make_temp_relation(this) %}\n\n {#-- Validate early so we don't run SQL if the strategy is invalid --#}\n {% set strategy = dbt_snowflake_validate_get_incremental_strategy(config) -%}\n\n -- setup\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n {% if existing_relation is none %}\n {% set build_sql = create_table_as(False, target_relation, sql) %}\n {% elif existing_relation.is_view %}\n {#-- Can't overwrite a view with a table - we must drop --#}\n {{ log(\"Dropping relation \" ~ target_relation ~ \" because it is a view and this model is a table.\") }}\n {% do adapter.drop_relation(existing_relation) %}\n {% set build_sql = create_table_as(False, target_relation, sql) %}\n {% elif full_refresh_mode %}\n {% set build_sql = create_table_as(False, target_relation, sql) %}\n {% else %}\n {% do run_query(create_table_as(True, tmp_relation, sql)) %}\n {% do adapter.expand_target_column_types(\n from_relation=tmp_relation,\n to_relation=target_relation) %}\n {% set dest_columns = adapter.get_columns_in_relation(target_relation) %}\n {% set build_sql = dbt_snowflake_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key, dest_columns) %}\n {% endif %}\n\n {%- call statement('main') -%}\n {{ build_sql }}\n {%- endcall -%}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n -- `COMMIT` happens here\n {{ adapter.commit() }}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {% set target_relation = target_relation.incorporate(type='table') %}\n {% do persist_docs(target_relation, model) %}\n\n {% do unset_query_tag(original_query_tag) %}\n\n {{ return({'relations': [target_relation]}) }}\n\n{%- endmaterialization %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.statement": {"unique_id": "macro.dbt.statement", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/core.sql", "original_file_path": "macros/core.sql", "name": "statement", "macro_sql": "{% macro statement(name=None, fetch_result=False, auto_begin=True) -%}\n {%- if execute: -%}\n {%- set sql = caller() -%}\n\n {%- if name == 'main' -%}\n {{ log('Writing runtime SQL for node \"{}\"'.format(model['unique_id'])) }}\n {{ write(sql) }}\n {%- endif -%}\n\n {%- set res, table = adapter.execute(sql, auto_begin=auto_begin, fetch=fetch_result) -%}\n {%- if name is not none -%}\n {{ store_result(name, response=res, agate_table=table) }}\n {%- endif -%}\n\n {%- endif -%}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.noop_statement": {"unique_id": "macro.dbt.noop_statement", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/core.sql", "original_file_path": "macros/core.sql", "name": "noop_statement", "macro_sql": "{% macro noop_statement(name=None, message=None, code=None, rows_affected=None, res=None) -%}\n {%- set sql = caller() -%}\n\n {%- if name == 'main' -%}\n {{ log('Writing runtime SQL for node \"{}\"'.format(model['unique_id'])) }}\n {{ write(sql) }}\n {%- endif -%}\n\n {%- if name is not none -%}\n {{ store_raw_result(name, message=message, code=code, rows_affected=rows_affected, agate_table=res) }}\n {%- endif -%}\n\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.run_hooks": {"unique_id": "macro.dbt.run_hooks", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/helpers.sql", "original_file_path": "macros/materializations/helpers.sql", "name": "run_hooks", "macro_sql": "{% macro run_hooks(hooks, inside_transaction=True) %}\n {% for hook in hooks | selectattr('transaction', 'equalto', inside_transaction) %}\n {% if not inside_transaction and loop.first %}\n {% call statement(auto_begin=inside_transaction) %}\n commit;\n {% endcall %}\n {% endif %}\n {% set rendered = render(hook.get('sql')) | trim %}\n {% if (rendered | length) > 0 %}\n {% call statement(auto_begin=inside_transaction) %}\n {{ rendered }}\n {% endcall %}\n {% endif %}\n {% endfor %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.column_list": {"unique_id": "macro.dbt.column_list", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/helpers.sql", "original_file_path": "macros/materializations/helpers.sql", "name": "column_list", "macro_sql": "{% macro column_list(columns) %}\n {%- for col in columns %}\n {{ col.name }} {% if not loop.last %},{% endif %}\n {% endfor -%}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.column_list_for_create_table": {"unique_id": "macro.dbt.column_list_for_create_table", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/helpers.sql", "original_file_path": "macros/materializations/helpers.sql", "name": "column_list_for_create_table", "macro_sql": "{% macro column_list_for_create_table(columns) %}\n {%- for col in columns %}\n {{ col.name }} {{ col.data_type }} {%- if not loop.last %},{% endif %}\n {% endfor -%}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.make_hook_config": {"unique_id": "macro.dbt.make_hook_config", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/helpers.sql", "original_file_path": "macros/materializations/helpers.sql", "name": "make_hook_config", "macro_sql": "{% macro make_hook_config(sql, inside_transaction) %}\n {{ tojson({\"sql\": sql, \"transaction\": inside_transaction}) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.before_begin": {"unique_id": "macro.dbt.before_begin", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/helpers.sql", "original_file_path": "macros/materializations/helpers.sql", "name": "before_begin", "macro_sql": "{% macro before_begin(sql) %}\n {{ make_hook_config(sql, inside_transaction=False) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.in_transaction": {"unique_id": "macro.dbt.in_transaction", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/helpers.sql", "original_file_path": "macros/materializations/helpers.sql", "name": "in_transaction", "macro_sql": "{% macro in_transaction(sql) %}\n {{ make_hook_config(sql, inside_transaction=True) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.after_commit": {"unique_id": "macro.dbt.after_commit", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/helpers.sql", "original_file_path": "macros/materializations/helpers.sql", "name": "after_commit", "macro_sql": "{% macro after_commit(sql) %}\n {{ make_hook_config(sql, inside_transaction=False) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.drop_relation_if_exists": {"unique_id": "macro.dbt.drop_relation_if_exists", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/helpers.sql", "original_file_path": "macros/materializations/helpers.sql", "name": "drop_relation_if_exists", "macro_sql": "{% macro drop_relation_if_exists(relation) %}\n {% if relation is not none %}\n {{ adapter.drop_relation(relation) }}\n {% endif %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.load_relation": {"unique_id": "macro.dbt.load_relation", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/helpers.sql", "original_file_path": "macros/materializations/helpers.sql", "name": "load_relation", "macro_sql": "{% macro load_relation(relation) %}\n {% do return(adapter.get_relation(\n database=relation.database,\n schema=relation.schema,\n identifier=relation.identifier\n )) -%}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.should_full_refresh": {"unique_id": "macro.dbt.should_full_refresh", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/helpers.sql", "original_file_path": "macros/materializations/helpers.sql", "name": "should_full_refresh", "macro_sql": "{% macro should_full_refresh() %}\n {% set config_full_refresh = config.get('full_refresh') %}\n {% if config_full_refresh is none %}\n {% set config_full_refresh = flags.FULL_REFRESH %}\n {% endif %}\n {% do return(config_full_refresh) %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.snapshot_merge_sql": {"unique_id": "macro.dbt.snapshot_merge_sql", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/snapshot_merge.sql", "original_file_path": "macros/materializations/snapshot/snapshot_merge.sql", "name": "snapshot_merge_sql", "macro_sql": "{% macro snapshot_merge_sql(target, source, insert_cols) -%}\n {{ adapter.dispatch('snapshot_merge_sql')(target, source, insert_cols) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__snapshot_merge_sql": {"unique_id": "macro.dbt.default__snapshot_merge_sql", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/snapshot_merge.sql", "original_file_path": "macros/materializations/snapshot/snapshot_merge.sql", "name": "default__snapshot_merge_sql", "macro_sql": "{% macro default__snapshot_merge_sql(target, source, insert_cols) -%}\n {%- set insert_cols_csv = insert_cols | join(', ') -%}\n\n merge into {{ target }} as DBT_INTERNAL_DEST\n using {{ source }} as DBT_INTERNAL_SOURCE\n on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id\n\n when matched\n and DBT_INTERNAL_DEST.dbt_valid_to is null\n and DBT_INTERNAL_SOURCE.dbt_change_type in ('update', 'delete')\n then update\n set dbt_valid_to = DBT_INTERNAL_SOURCE.dbt_valid_to\n\n when not matched\n and DBT_INTERNAL_SOURCE.dbt_change_type = 'insert'\n then insert ({{ insert_cols_csv }})\n values ({{ insert_cols_csv }})\n ;\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.strategy_dispatch": {"unique_id": "macro.dbt.strategy_dispatch", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/strategies.sql", "original_file_path": "macros/materializations/snapshot/strategies.sql", "name": "strategy_dispatch", "macro_sql": "{% macro strategy_dispatch(name) -%}\n{% set original_name = name %}\n {% if '.' in name %}\n {% set package_name, name = name.split(\".\", 1) %}\n {% else %}\n {% set package_name = none %}\n {% endif %}\n\n {% if package_name is none %}\n {% set package_context = context %}\n {% elif package_name in context %}\n {% set package_context = context[package_name] %}\n {% else %}\n {% set error_msg %}\n Could not find package '{{package_name}}', called with '{{original_name}}'\n {% endset %}\n {{ exceptions.raise_compiler_error(error_msg | trim) }}\n {% endif %}\n\n {%- set search_name = 'snapshot_' ~ name ~ '_strategy' -%}\n\n {% if search_name not in package_context %}\n {% set error_msg %}\n The specified strategy macro '{{name}}' was not found in package '{{ package_name }}'\n {% endset %}\n {{ exceptions.raise_compiler_error(error_msg | trim) }}\n {% endif %}\n {{ return(package_context[search_name]) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.snapshot_hash_arguments": {"unique_id": "macro.dbt.snapshot_hash_arguments", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/strategies.sql", "original_file_path": "macros/materializations/snapshot/strategies.sql", "name": "snapshot_hash_arguments", "macro_sql": "{% macro snapshot_hash_arguments(args) -%}\n {{ adapter.dispatch('snapshot_hash_arguments')(args) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__snapshot_hash_arguments": {"unique_id": "macro.dbt.default__snapshot_hash_arguments", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/strategies.sql", "original_file_path": "macros/materializations/snapshot/strategies.sql", "name": "default__snapshot_hash_arguments", "macro_sql": "{% macro default__snapshot_hash_arguments(args) -%}\n md5({%- for arg in args -%}\n coalesce(cast({{ arg }} as varchar ), '')\n {% if not loop.last %} || '|' || {% endif %}\n {%- endfor -%})\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.snapshot_get_time": {"unique_id": "macro.dbt.snapshot_get_time", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/strategies.sql", "original_file_path": "macros/materializations/snapshot/strategies.sql", "name": "snapshot_get_time", "macro_sql": "{% macro snapshot_get_time() -%}\n {{ adapter.dispatch('snapshot_get_time')() }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__snapshot_get_time": {"unique_id": "macro.dbt.default__snapshot_get_time", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/strategies.sql", "original_file_path": "macros/materializations/snapshot/strategies.sql", "name": "default__snapshot_get_time", "macro_sql": "{% macro default__snapshot_get_time() -%}\n {{ current_timestamp() }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.snapshot_timestamp_strategy": {"unique_id": "macro.dbt.snapshot_timestamp_strategy", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/strategies.sql", "original_file_path": "macros/materializations/snapshot/strategies.sql", "name": "snapshot_timestamp_strategy", "macro_sql": "{% macro snapshot_timestamp_strategy(node, snapshotted_rel, current_rel, config, target_exists) %}\n {% set primary_key = config['unique_key'] %}\n {% set updated_at = config['updated_at'] %}\n {% set invalidate_hard_deletes = config.get('invalidate_hard_deletes', false) %}\n\n {#/*\n The snapshot relation might not have an {{ updated_at }} value if the\n snapshot strategy is changed from `check` to `timestamp`. We\n should use a dbt-created column for the comparison in the snapshot\n table instead of assuming that the user-supplied {{ updated_at }}\n will be present in the historical data.\n\n See https://github.com/fishtown-analytics/dbt/issues/2350\n */ #}\n {% set row_changed_expr -%}\n ({{ snapshotted_rel }}.dbt_valid_from < {{ current_rel }}.{{ updated_at }})\n {%- endset %}\n\n {% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %}\n\n {% do return({\n \"unique_key\": primary_key,\n \"updated_at\": updated_at,\n \"row_changed\": row_changed_expr,\n \"scd_id\": scd_id_expr,\n \"invalidate_hard_deletes\": invalidate_hard_deletes\n }) %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.snapshot_string_as_time": {"unique_id": "macro.dbt.snapshot_string_as_time", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/strategies.sql", "original_file_path": "macros/materializations/snapshot/strategies.sql", "name": "snapshot_string_as_time", "macro_sql": "{% macro snapshot_string_as_time(timestamp) -%}\n {{ adapter.dispatch('snapshot_string_as_time')(timestamp) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__snapshot_string_as_time": {"unique_id": "macro.dbt.default__snapshot_string_as_time", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/strategies.sql", "original_file_path": "macros/materializations/snapshot/strategies.sql", "name": "default__snapshot_string_as_time", "macro_sql": "{% macro default__snapshot_string_as_time(timestamp) %}\n {% do exceptions.raise_not_implemented(\n 'snapshot_string_as_time macro not implemented for adapter '+adapter.type()\n ) %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.snapshot_check_all_get_existing_columns": {"unique_id": "macro.dbt.snapshot_check_all_get_existing_columns", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/strategies.sql", "original_file_path": "macros/materializations/snapshot/strategies.sql", "name": "snapshot_check_all_get_existing_columns", "macro_sql": "{% macro snapshot_check_all_get_existing_columns(node, target_exists) -%}\n {%- set query_columns = get_columns_in_query(node['compiled_sql']) -%}\n {%- if not target_exists -%}\n {# no table yet -> return whatever the query does #}\n {{ return([false, query_columns]) }}\n {%- endif -%}\n {# handle any schema changes #}\n {%- set target_table = node.get('alias', node.get('name')) -%}\n {%- set target_relation = adapter.get_relation(database=node.database, schema=node.schema, identifier=target_table) -%}\n {%- set existing_cols = get_columns_in_query('select * from ' ~ target_relation) -%}\n {%- set ns = namespace() -%} {# handle for-loop scoping with a namespace #}\n {%- set ns.column_added = false -%}\n\n {%- set intersection = [] -%}\n {%- for col in query_columns -%}\n {%- if col in existing_cols -%}\n {%- do intersection.append(col) -%}\n {%- else -%}\n {% set ns.column_added = true %}\n {%- endif -%}\n {%- endfor -%}\n {{ return([ns.column_added, intersection]) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.snapshot_check_strategy": {"unique_id": "macro.dbt.snapshot_check_strategy", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/strategies.sql", "original_file_path": "macros/materializations/snapshot/strategies.sql", "name": "snapshot_check_strategy", "macro_sql": "{% macro snapshot_check_strategy(node, snapshotted_rel, current_rel, config, target_exists) %}\n {% set check_cols_config = config['check_cols'] %}\n {% set primary_key = config['unique_key'] %}\n {% set invalidate_hard_deletes = config.get('invalidate_hard_deletes', false) %}\n \n {% set select_current_time -%}\n select {{ snapshot_get_time() }} as snapshot_start\n {%- endset %}\n\n {#-- don't access the column by name, to avoid dealing with casing issues on snowflake #}\n {%- set now = run_query(select_current_time)[0][0] -%}\n {% if now is none or now is undefined -%}\n {%- do exceptions.raise_compiler_error('Could not get a snapshot start time from the database') -%}\n {%- endif %}\n {% set updated_at = snapshot_string_as_time(now) %}\n\n {% set column_added = false %}\n\n {% if check_cols_config == 'all' %}\n {% set column_added, check_cols = snapshot_check_all_get_existing_columns(node, target_exists) %}\n {% elif check_cols_config is iterable and (check_cols_config | length) > 0 %}\n {% set check_cols = check_cols_config %}\n {% else %}\n {% do exceptions.raise_compiler_error(\"Invalid value for 'check_cols': \" ~ check_cols_config) %}\n {% endif %}\n\n {%- set row_changed_expr -%}\n (\n {%- if column_added -%}\n TRUE\n {%- else -%}\n {%- for col in check_cols -%}\n {{ snapshotted_rel }}.{{ col }} != {{ current_rel }}.{{ col }}\n or\n (\n (({{ snapshotted_rel }}.{{ col }} is null) and not ({{ current_rel }}.{{ col }} is null))\n or\n ((not {{ snapshotted_rel }}.{{ col }} is null) and ({{ current_rel }}.{{ col }} is null))\n )\n {%- if not loop.last %} or {% endif -%}\n {%- endfor -%}\n {%- endif -%}\n )\n {%- endset %}\n\n {% set scd_id_expr = snapshot_hash_arguments([primary_key, updated_at]) %}\n\n {% do return({\n \"unique_key\": primary_key,\n \"updated_at\": updated_at,\n \"row_changed\": row_changed_expr,\n \"scd_id\": scd_id_expr,\n \"invalidate_hard_deletes\": invalidate_hard_deletes\n }) %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.create_columns": {"unique_id": "macro.dbt.create_columns", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/snapshot.sql", "original_file_path": "macros/materializations/snapshot/snapshot.sql", "name": "create_columns", "macro_sql": "{% macro create_columns(relation, columns) %}\n {{ adapter.dispatch('create_columns')(relation, columns) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__create_columns": {"unique_id": "macro.dbt.default__create_columns", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/snapshot.sql", "original_file_path": "macros/materializations/snapshot/snapshot.sql", "name": "default__create_columns", "macro_sql": "{% macro default__create_columns(relation, columns) %}\n {% for column in columns %}\n {% call statement() %}\n alter table {{ relation }} add column \"{{ column.name }}\" {{ column.data_type }};\n {% endcall %}\n {% endfor %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.post_snapshot": {"unique_id": "macro.dbt.post_snapshot", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/snapshot.sql", "original_file_path": "macros/materializations/snapshot/snapshot.sql", "name": "post_snapshot", "macro_sql": "{% macro post_snapshot(staging_relation) %}\n {{ adapter.dispatch('post_snapshot')(staging_relation) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__post_snapshot": {"unique_id": "macro.dbt.default__post_snapshot", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/snapshot.sql", "original_file_path": "macros/materializations/snapshot/snapshot.sql", "name": "default__post_snapshot", "macro_sql": "{% macro default__post_snapshot(staging_relation) %}\n {# no-op #}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.snapshot_staging_table": {"unique_id": "macro.dbt.snapshot_staging_table", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/snapshot.sql", "original_file_path": "macros/materializations/snapshot/snapshot.sql", "name": "snapshot_staging_table", "macro_sql": "{% macro snapshot_staging_table(strategy, source_sql, target_relation) -%}\n\n with snapshot_query as (\n\n {{ source_sql }}\n\n ),\n\n snapshotted_data as (\n\n select *,\n {{ strategy.unique_key }} as dbt_unique_key\n\n from {{ target_relation }}\n where dbt_valid_to is null\n\n ),\n\n insertions_source_data as (\n\n select\n *,\n {{ strategy.unique_key }} as dbt_unique_key,\n {{ strategy.updated_at }} as dbt_updated_at,\n {{ strategy.updated_at }} as dbt_valid_from,\n nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to,\n {{ strategy.scd_id }} as dbt_scd_id\n\n from snapshot_query\n ),\n\n updates_source_data as (\n\n select\n *,\n {{ strategy.unique_key }} as dbt_unique_key,\n {{ strategy.updated_at }} as dbt_updated_at,\n {{ strategy.updated_at }} as dbt_valid_from,\n {{ strategy.updated_at }} as dbt_valid_to\n\n from snapshot_query\n ),\n\n {%- if strategy.invalidate_hard_deletes %}\n\n deletes_source_data as (\n\n select \n *,\n {{ strategy.unique_key }} as dbt_unique_key\n from snapshot_query\n ),\n {% endif %}\n\n insertions as (\n\n select\n 'insert' as dbt_change_type,\n source_data.*\n\n from insertions_source_data as source_data\n left outer join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key\n where snapshotted_data.dbt_unique_key is null\n or (\n snapshotted_data.dbt_unique_key is not null\n and (\n {{ strategy.row_changed }}\n )\n )\n\n ),\n\n updates as (\n\n select\n 'update' as dbt_change_type,\n source_data.*,\n snapshotted_data.dbt_scd_id\n\n from updates_source_data as source_data\n join snapshotted_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key\n where (\n {{ strategy.row_changed }}\n )\n )\n\n {%- if strategy.invalidate_hard_deletes -%}\n ,\n\n deletes as (\n \n select\n 'delete' as dbt_change_type,\n source_data.*,\n {{ snapshot_get_time() }} as dbt_valid_from,\n {{ snapshot_get_time() }} as dbt_updated_at,\n {{ snapshot_get_time() }} as dbt_valid_to,\n snapshotted_data.dbt_scd_id\n \n from snapshotted_data\n left join deletes_source_data as source_data on snapshotted_data.dbt_unique_key = source_data.dbt_unique_key\n where source_data.dbt_unique_key is null\n )\n {%- endif %}\n\n select * from insertions\n union all\n select * from updates\n {%- if strategy.invalidate_hard_deletes %}\n union all\n select * from deletes\n {%- endif %}\n\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.build_snapshot_table": {"unique_id": "macro.dbt.build_snapshot_table", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/snapshot.sql", "original_file_path": "macros/materializations/snapshot/snapshot.sql", "name": "build_snapshot_table", "macro_sql": "{% macro build_snapshot_table(strategy, sql) %}\n\n select *,\n {{ strategy.scd_id }} as dbt_scd_id,\n {{ strategy.updated_at }} as dbt_updated_at,\n {{ strategy.updated_at }} as dbt_valid_from,\n nullif({{ strategy.updated_at }}, {{ strategy.updated_at }}) as dbt_valid_to\n from (\n {{ sql }}\n ) sbq\n\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.get_or_create_relation": {"unique_id": "macro.dbt.get_or_create_relation", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/snapshot.sql", "original_file_path": "macros/materializations/snapshot/snapshot.sql", "name": "get_or_create_relation", "macro_sql": "{% macro get_or_create_relation(database, schema, identifier, type) %}\n {%- set target_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) %}\n\n {% if target_relation %}\n {% do return([true, target_relation]) %}\n {% endif %}\n\n {%- set new_relation = api.Relation.create(\n database=database,\n schema=schema,\n identifier=identifier,\n type=type\n ) -%}\n {% do return([false, new_relation]) %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.build_snapshot_staging_table": {"unique_id": "macro.dbt.build_snapshot_staging_table", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/snapshot.sql", "original_file_path": "macros/materializations/snapshot/snapshot.sql", "name": "build_snapshot_staging_table", "macro_sql": "{% macro build_snapshot_staging_table(strategy, sql, target_relation) %}\n {% set tmp_relation = make_temp_relation(target_relation) %}\n\n {% set select = snapshot_staging_table(strategy, sql, target_relation) %}\n\n {% call statement('build_snapshot_staging_relation') %}\n {{ create_table_as(True, tmp_relation, select) }}\n {% endcall %}\n\n {% do return(tmp_relation) %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.materialization_snapshot_default": {"unique_id": "macro.dbt.materialization_snapshot_default", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/snapshot/snapshot.sql", "original_file_path": "macros/materializations/snapshot/snapshot.sql", "name": "materialization_snapshot_default", "macro_sql": "{% materialization snapshot, default %}\n {%- set config = model['config'] -%}\n\n {%- set target_table = model.get('alias', model.get('name')) -%}\n\n {%- set strategy_name = config.get('strategy') -%}\n {%- set unique_key = config.get('unique_key') %}\n\n {% if not adapter.check_schema_exists(model.database, model.schema) %}\n {% do create_schema(model.database, model.schema) %}\n {% endif %}\n\n {% set target_relation_exists, target_relation = get_or_create_relation(\n database=model.database,\n schema=model.schema,\n identifier=target_table,\n type='table') -%}\n\n {%- if not target_relation.is_table -%}\n {% do exceptions.relation_wrong_type(target_relation, 'table') %}\n {%- endif -%}\n\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n {% set strategy_macro = strategy_dispatch(strategy_name) %}\n {% set strategy = strategy_macro(model, \"snapshotted_data\", \"source_data\", config, target_relation_exists) %}\n\n {% if not target_relation_exists %}\n\n {% set build_sql = build_snapshot_table(strategy, model['compiled_sql']) %}\n {% set final_sql = create_table_as(False, target_relation, build_sql) %}\n\n {% else %}\n\n {{ adapter.valid_snapshot_target(target_relation) }}\n\n {% set staging_table = build_snapshot_staging_table(strategy, sql, target_relation) %}\n\n -- this may no-op if the database does not require column expansion\n {% do adapter.expand_target_column_types(from_relation=staging_table,\n to_relation=target_relation) %}\n\n {% set missing_columns = adapter.get_missing_columns(staging_table, target_relation)\n | rejectattr('name', 'equalto', 'dbt_change_type')\n | rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')\n | rejectattr('name', 'equalto', 'dbt_unique_key')\n | rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')\n | list %}\n\n {% do create_columns(target_relation, missing_columns) %}\n\n {% set source_columns = adapter.get_columns_in_relation(staging_table)\n | rejectattr('name', 'equalto', 'dbt_change_type')\n | rejectattr('name', 'equalto', 'DBT_CHANGE_TYPE')\n | rejectattr('name', 'equalto', 'dbt_unique_key')\n | rejectattr('name', 'equalto', 'DBT_UNIQUE_KEY')\n | list %}\n\n {% set quoted_source_columns = [] %}\n {% for column in source_columns %}\n {% do quoted_source_columns.append(adapter.quote(column.name)) %}\n {% endfor %}\n\n {% set final_sql = snapshot_merge_sql(\n target = target_relation,\n source = staging_table,\n insert_cols = quoted_source_columns\n )\n %}\n\n {% endif %}\n\n {% call statement('main') %}\n {{ final_sql }}\n {% endcall %}\n\n {% do persist_docs(target_relation, model) %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n {{ adapter.commit() }}\n\n {% if staging_table is defined %}\n {% do post_snapshot(staging_table) %}\n {% endif %}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{% endmaterialization %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.create_csv_table": {"unique_id": "macro.dbt.create_csv_table", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/seed/seed.sql", "original_file_path": "macros/materializations/seed/seed.sql", "name": "create_csv_table", "macro_sql": "{% macro create_csv_table(model, agate_table) -%}\n {{ adapter.dispatch('create_csv_table')(model, agate_table) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.reset_csv_table": {"unique_id": "macro.dbt.reset_csv_table", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/seed/seed.sql", "original_file_path": "macros/materializations/seed/seed.sql", "name": "reset_csv_table", "macro_sql": "{% macro reset_csv_table(model, full_refresh, old_relation, agate_table) -%}\n {{ adapter.dispatch('reset_csv_table')(model, full_refresh, old_relation, agate_table) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.load_csv_rows": {"unique_id": "macro.dbt.load_csv_rows", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/seed/seed.sql", "original_file_path": "macros/materializations/seed/seed.sql", "name": "load_csv_rows", "macro_sql": "{% macro load_csv_rows(model, agate_table) -%}\n {{ adapter.dispatch('load_csv_rows')(model, agate_table) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__create_csv_table": {"unique_id": "macro.dbt.default__create_csv_table", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/seed/seed.sql", "original_file_path": "macros/materializations/seed/seed.sql", "name": "default__create_csv_table", "macro_sql": "{% macro default__create_csv_table(model, agate_table) %}\n {%- set column_override = model['config'].get('column_types', {}) -%}\n {%- set quote_seed_column = model['config'].get('quote_columns', None) -%}\n\n {% set sql %}\n create table {{ this.render() }} (\n {%- for col_name in agate_table.column_names -%}\n {%- set inferred_type = adapter.convert_type(agate_table, loop.index0) -%}\n {%- set type = column_override.get(col_name, inferred_type) -%}\n {%- set column_name = (col_name | string) -%}\n {{ adapter.quote_seed_column(column_name, quote_seed_column) }} {{ type }} {%- if not loop.last -%}, {%- endif -%}\n {%- endfor -%}\n )\n {% endset %}\n\n {% call statement('_') -%}\n {{ sql }}\n {%- endcall %}\n\n {{ return(sql) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__reset_csv_table": {"unique_id": "macro.dbt.default__reset_csv_table", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/seed/seed.sql", "original_file_path": "macros/materializations/seed/seed.sql", "name": "default__reset_csv_table", "macro_sql": "{% macro default__reset_csv_table(model, full_refresh, old_relation, agate_table) %}\n {% set sql = \"\" %}\n {% if full_refresh %}\n {{ adapter.drop_relation(old_relation) }}\n {% set sql = create_csv_table(model, agate_table) %}\n {% else %}\n {{ adapter.truncate_relation(old_relation) }}\n {% set sql = \"truncate table \" ~ old_relation %}\n {% endif %}\n\n {{ return(sql) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.get_seed_column_quoted_csv": {"unique_id": "macro.dbt.get_seed_column_quoted_csv", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/seed/seed.sql", "original_file_path": "macros/materializations/seed/seed.sql", "name": "get_seed_column_quoted_csv", "macro_sql": "{% macro get_seed_column_quoted_csv(model, column_names) %}\n {%- set quote_seed_column = model['config'].get('quote_columns', None) -%}\n {% set quoted = [] %}\n {% for col in column_names -%}\n {%- do quoted.append(adapter.quote_seed_column(col, quote_seed_column)) -%}\n {%- endfor %}\n\n {%- set dest_cols_csv = quoted | join(', ') -%}\n {{ return(dest_cols_csv) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.basic_load_csv_rows": {"unique_id": "macro.dbt.basic_load_csv_rows", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/seed/seed.sql", "original_file_path": "macros/materializations/seed/seed.sql", "name": "basic_load_csv_rows", "macro_sql": "{% macro basic_load_csv_rows(model, batch_size, agate_table) %}\n {% set cols_sql = get_seed_column_quoted_csv(model, agate_table.column_names) %}\n {% set bindings = [] %}\n\n {% set statements = [] %}\n\n {% for chunk in agate_table.rows | batch(batch_size) %}\n {% set bindings = [] %}\n\n {% for row in chunk %}\n {% do bindings.extend(row) %}\n {% endfor %}\n\n {% set sql %}\n insert into {{ this.render() }} ({{ cols_sql }}) values\n {% for row in chunk -%}\n ({%- for column in agate_table.column_names -%}\n %s\n {%- if not loop.last%},{%- endif %}\n {%- endfor -%})\n {%- if not loop.last%},{%- endif %}\n {%- endfor %}\n {% endset %}\n\n {% do adapter.add_query(sql, bindings=bindings, abridge_sql_log=True) %}\n\n {% if loop.index0 == 0 %}\n {% do statements.append(sql) %}\n {% endif %}\n {% endfor %}\n\n {# Return SQL so we can render it out into the compiled files #}\n {{ return(statements[0]) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__load_csv_rows": {"unique_id": "macro.dbt.default__load_csv_rows", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/seed/seed.sql", "original_file_path": "macros/materializations/seed/seed.sql", "name": "default__load_csv_rows", "macro_sql": "{% macro default__load_csv_rows(model, agate_table) %}\n {{ return(basic_load_csv_rows(model, 10000, agate_table) )}}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.materialization_seed_default": {"unique_id": "macro.dbt.materialization_seed_default", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/seed/seed.sql", "original_file_path": "macros/materializations/seed/seed.sql", "name": "materialization_seed_default", "macro_sql": "{% materialization seed, default %}\n\n {%- set identifier = model['alias'] -%}\n {%- set full_refresh_mode = (should_full_refresh()) -%}\n\n {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}\n\n {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%}\n {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}\n\n {%- set agate_table = load_agate_table() -%}\n {%- do store_result('agate_table', response='OK', agate_table=agate_table) -%}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n -- build model\n {% set create_table_sql = \"\" %}\n {% if exists_as_view %}\n {{ exceptions.raise_compiler_error(\"Cannot seed to '{}', it is a view\".format(old_relation)) }}\n {% elif exists_as_table %}\n {% set create_table_sql = reset_csv_table(model, full_refresh_mode, old_relation, agate_table) %}\n {% else %}\n {% set create_table_sql = create_csv_table(model, agate_table) %}\n {% endif %}\n\n {% set code = 'CREATE' if full_refresh_mode else 'INSERT' %}\n {% set rows_affected = (agate_table.rows | length) %}\n {% set sql = load_csv_rows(model, agate_table) %}\n\n {% call noop_statement('main', code ~ ' ' ~ rows_affected, code, rows_affected) %}\n {{ create_table_sql }};\n -- dbt seed --\n {{ sql }}\n {% endcall %}\n\n {% set target_relation = this.incorporate(type='table') %}\n {% do persist_docs(target_relation, model) %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n -- `COMMIT` happens here\n {{ adapter.commit() }}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{% endmaterialization %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.incremental_upsert": {"unique_id": "macro.dbt.incremental_upsert", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/incremental/helpers.sql", "original_file_path": "macros/materializations/incremental/helpers.sql", "name": "incremental_upsert", "macro_sql": "{% macro incremental_upsert(tmp_relation, target_relation, unique_key=none, statement_name=\"main\") %}\n {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%}\n {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%}\n\n {%- if unique_key is not none -%}\n delete\n from {{ target_relation }}\n where ({{ unique_key }}) in (\n select ({{ unique_key }})\n from {{ tmp_relation }}\n );\n {%- endif %}\n\n insert into {{ target_relation }} ({{ dest_cols_csv }})\n (\n select {{ dest_cols_csv }}\n from {{ tmp_relation }}\n );\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.materialization_incremental_default": {"unique_id": "macro.dbt.materialization_incremental_default", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/incremental/incremental.sql", "original_file_path": "macros/materializations/incremental/incremental.sql", "name": "materialization_incremental_default", "macro_sql": "{% materialization incremental, default -%}\n\n {% set unique_key = config.get('unique_key') %}\n\n {% set target_relation = this.incorporate(type='table') %}\n {% set existing_relation = load_relation(this) %}\n {% set tmp_relation = make_temp_relation(this) %}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n {% set to_drop = [] %}\n {% if existing_relation is none %}\n {% set build_sql = create_table_as(False, target_relation, sql) %}\n {% elif existing_relation.is_view or should_full_refresh() %}\n {#-- Make sure the backup doesn't exist so we don't encounter issues with the rename below #}\n {% set backup_identifier = existing_relation.identifier ~ \"__dbt_backup\" %}\n {% set backup_relation = existing_relation.incorporate(path={\"identifier\": backup_identifier}) %}\n {% do adapter.drop_relation(backup_relation) %}\n\n {% do adapter.rename_relation(target_relation, backup_relation) %}\n {% set build_sql = create_table_as(False, target_relation, sql) %}\n {% do to_drop.append(backup_relation) %}\n {% else %}\n {% set tmp_relation = make_temp_relation(target_relation) %}\n {% do run_query(create_table_as(True, tmp_relation, sql)) %}\n {% do adapter.expand_target_column_types(\n from_relation=tmp_relation,\n to_relation=target_relation) %}\n {% set build_sql = incremental_upsert(tmp_relation, target_relation, unique_key=unique_key) %}\n {% endif %}\n\n {% call statement(\"main\") %}\n {{ build_sql }}\n {% endcall %}\n\n {% do persist_docs(target_relation, model) %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n -- `COMMIT` happens here\n {% do adapter.commit() %}\n\n {% for rel in to_drop %}\n {% do adapter.drop_relation(rel) %}\n {% endfor %}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{%- endmaterialization %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.get_merge_sql": {"unique_id": "macro.dbt.get_merge_sql", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/common/merge.sql", "original_file_path": "macros/materializations/common/merge.sql", "name": "get_merge_sql", "macro_sql": "{% macro get_merge_sql(target, source, unique_key, dest_columns, predicates=none) -%}\n {{ adapter.dispatch('get_merge_sql')(target, source, unique_key, dest_columns, predicates) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.get_delete_insert_merge_sql": {"unique_id": "macro.dbt.get_delete_insert_merge_sql", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/common/merge.sql", "original_file_path": "macros/materializations/common/merge.sql", "name": "get_delete_insert_merge_sql", "macro_sql": "{% macro get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}\n {{ adapter.dispatch('get_delete_insert_merge_sql')(target, source, unique_key, dest_columns) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.get_insert_overwrite_merge_sql": {"unique_id": "macro.dbt.get_insert_overwrite_merge_sql", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/common/merge.sql", "original_file_path": "macros/materializations/common/merge.sql", "name": "get_insert_overwrite_merge_sql", "macro_sql": "{% macro get_insert_overwrite_merge_sql(target, source, dest_columns, predicates, include_sql_header=false) -%}\n {{ adapter.dispatch('get_insert_overwrite_merge_sql')(target, source, dest_columns, predicates, include_sql_header) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__get_merge_sql": {"unique_id": "macro.dbt.default__get_merge_sql", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/common/merge.sql", "original_file_path": "macros/materializations/common/merge.sql", "name": "default__get_merge_sql", "macro_sql": "{% macro default__get_merge_sql(target, source, unique_key, dest_columns, predicates) -%}\n {%- set predicates = [] if predicates is none else [] + predicates -%}\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {% if unique_key %}\n {% set unique_key_match %}\n DBT_INTERNAL_SOURCE.{{ unique_key }} = DBT_INTERNAL_DEST.{{ unique_key }}\n {% endset %}\n {% do predicates.append(unique_key_match) %}\n {% else %}\n {% do predicates.append('FALSE') %}\n {% endif %}\n\n {{ sql_header if sql_header is not none }}\n\n merge into {{ target }} as DBT_INTERNAL_DEST\n using {{ source }} as DBT_INTERNAL_SOURCE\n on {{ predicates | join(' and ') }}\n\n {% if unique_key %}\n when matched then update set\n {% for column in dest_columns -%}\n {{ adapter.quote(column.name) }} = DBT_INTERNAL_SOURCE.{{ adapter.quote(column.name) }}\n {%- if not loop.last %}, {%- endif %}\n {%- endfor %}\n {% endif %}\n\n when not matched then insert\n ({{ dest_cols_csv }})\n values\n ({{ dest_cols_csv }})\n\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.get_quoted_csv": {"unique_id": "macro.dbt.get_quoted_csv", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/common/merge.sql", "original_file_path": "macros/materializations/common/merge.sql", "name": "get_quoted_csv", "macro_sql": "{% macro get_quoted_csv(column_names) %}\n {% set quoted = [] %}\n {% for col in column_names -%}\n {%- do quoted.append(adapter.quote(col)) -%}\n {%- endfor %}\n\n {%- set dest_cols_csv = quoted | join(', ') -%}\n {{ return(dest_cols_csv) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.common_get_delete_insert_merge_sql": {"unique_id": "macro.dbt.common_get_delete_insert_merge_sql", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/common/merge.sql", "original_file_path": "macros/materializations/common/merge.sql", "name": "common_get_delete_insert_merge_sql", "macro_sql": "{% macro common_get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}\n\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n\n {% if unique_key is not none %}\n delete from {{ target }}\n where ({{ unique_key }}) in (\n select ({{ unique_key }})\n from {{ source }}\n );\n {% endif %}\n\n insert into {{ target }} ({{ dest_cols_csv }})\n (\n select {{ dest_cols_csv }}\n from {{ source }}\n );\n\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__get_delete_insert_merge_sql": {"unique_id": "macro.dbt.default__get_delete_insert_merge_sql", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/common/merge.sql", "original_file_path": "macros/materializations/common/merge.sql", "name": "default__get_delete_insert_merge_sql", "macro_sql": "{% macro default__get_delete_insert_merge_sql(target, source, unique_key, dest_columns) -%}\n {{ common_get_delete_insert_merge_sql(target, source, unique_key, dest_columns) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__get_insert_overwrite_merge_sql": {"unique_id": "macro.dbt.default__get_insert_overwrite_merge_sql", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/common/merge.sql", "original_file_path": "macros/materializations/common/merge.sql", "name": "default__get_insert_overwrite_merge_sql", "macro_sql": "{% macro default__get_insert_overwrite_merge_sql(target, source, dest_columns, predicates, include_sql_header) -%}\n {%- set predicates = [] if predicates is none else [] + predicates -%}\n {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute=\"name\")) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none and include_sql_header }}\n\n merge into {{ target }} as DBT_INTERNAL_DEST\n using {{ source }} as DBT_INTERNAL_SOURCE\n on FALSE\n\n when not matched by source\n {% if predicates %} and {{ predicates | join(' and ') }} {% endif %}\n then delete\n\n when not matched then insert\n ({{ dest_cols_csv }})\n values\n ({{ dest_cols_csv }})\n\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.materialization_table_default": {"unique_id": "macro.dbt.materialization_table_default", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/table/table.sql", "original_file_path": "macros/materializations/table/table.sql", "name": "materialization_table_default", "macro_sql": "{% materialization table, default %}\n {%- set identifier = model['alias'] -%}\n {%- set tmp_identifier = model['name'] + '__dbt_tmp' -%}\n {%- set backup_identifier = model['name'] + '__dbt_backup' -%}\n\n {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}\n {%- set target_relation = api.Relation.create(identifier=identifier,\n schema=schema,\n database=database,\n type='table') -%}\n {%- set intermediate_relation = api.Relation.create(identifier=tmp_identifier,\n schema=schema,\n database=database,\n type='table') -%}\n\n /*\n See ../view/view.sql for more information about this relation.\n */\n {%- set backup_relation_type = 'table' if old_relation is none else old_relation.type -%}\n {%- set backup_relation = api.Relation.create(identifier=backup_identifier,\n schema=schema,\n database=database,\n type=backup_relation_type) -%}\n\n {%- set exists_as_table = (old_relation is not none and old_relation.is_table) -%}\n {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}\n\n\n -- drop the temp relations if they exists for some reason\n {{ adapter.drop_relation(intermediate_relation) }}\n {{ adapter.drop_relation(backup_relation) }}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n -- build model\n {% call statement('main') -%}\n {{ create_table_as(False, intermediate_relation, sql) }}\n {%- endcall %}\n\n -- cleanup\n {% if old_relation is not none %}\n {{ adapter.rename_relation(target_relation, backup_relation) }}\n {% endif %}\n\n {{ adapter.rename_relation(intermediate_relation, target_relation) }}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n {% do persist_docs(target_relation, model) %}\n\n -- `COMMIT` happens here\n {{ adapter.commit() }}\n\n -- finally, drop the existing/backup relation after the commit\n {{ drop_relation_if_exists(backup_relation) }}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n{% endmaterialization %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.materialization_view_default": {"unique_id": "macro.dbt.materialization_view_default", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/view/view.sql", "original_file_path": "macros/materializations/view/view.sql", "name": "materialization_view_default", "macro_sql": "{%- materialization view, default -%}\n\n {%- set identifier = model['alias'] -%}\n {%- set tmp_identifier = model['name'] + '__dbt_tmp' -%}\n {%- set backup_identifier = model['name'] + '__dbt_backup' -%}\n\n {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}\n {%- set target_relation = api.Relation.create(identifier=identifier, schema=schema, database=database,\n type='view') -%}\n {%- set intermediate_relation = api.Relation.create(identifier=tmp_identifier,\n schema=schema, database=database, type='view') -%}\n\n /*\n This relation (probably) doesn't exist yet. If it does exist, it's a leftover from\n a previous run, and we're going to try to drop it immediately. At the end of this\n materialization, we're going to rename the \"old_relation\" to this identifier,\n and then we're going to drop it. In order to make sure we run the correct one of:\n - drop view ...\n - drop table ...\n\n We need to set the type of this relation to be the type of the old_relation, if it exists,\n or else \"view\" as a sane default if it does not. Note that if the old_relation does not\n exist, then there is nothing to move out of the way and subsequentally drop. In that case,\n this relation will be effectively unused.\n */\n {%- set backup_relation_type = 'view' if old_relation is none else old_relation.type -%}\n {%- set backup_relation = api.Relation.create(identifier=backup_identifier,\n schema=schema, database=database,\n type=backup_relation_type) -%}\n\n {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}\n\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n\n -- drop the temp relations if they exists for some reason\n {{ adapter.drop_relation(intermediate_relation) }}\n {{ adapter.drop_relation(backup_relation) }}\n\n -- `BEGIN` happens here:\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n -- build model\n {% call statement('main') -%}\n {{ create_view_as(intermediate_relation, sql) }}\n {%- endcall %}\n\n -- cleanup\n -- move the existing view out of the way\n {% if old_relation is not none %}\n {{ adapter.rename_relation(target_relation, backup_relation) }}\n {% endif %}\n {{ adapter.rename_relation(intermediate_relation, target_relation) }}\n\n {% do persist_docs(target_relation, model) %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n {{ adapter.commit() }}\n\n {{ drop_relation_if_exists(backup_relation) }}\n\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n\n {{ return({'relations': [target_relation]}) }}\n\n{%- endmaterialization -%}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.handle_existing_table": {"unique_id": "macro.dbt.handle_existing_table", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/view/create_or_replace_view.sql", "original_file_path": "macros/materializations/view/create_or_replace_view.sql", "name": "handle_existing_table", "macro_sql": "{% macro handle_existing_table(full_refresh, old_relation) %}\n {{ adapter.dispatch(\"handle_existing_table\", packages=['dbt'])(full_refresh, old_relation) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__handle_existing_table": {"unique_id": "macro.dbt.default__handle_existing_table", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/view/create_or_replace_view.sql", "original_file_path": "macros/materializations/view/create_or_replace_view.sql", "name": "default__handle_existing_table", "macro_sql": "{% macro default__handle_existing_table(full_refresh, old_relation) %}\n {{ adapter.drop_relation(old_relation) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.create_or_replace_view": {"unique_id": "macro.dbt.create_or_replace_view", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/materializations/view/create_or_replace_view.sql", "original_file_path": "macros/materializations/view/create_or_replace_view.sql", "name": "create_or_replace_view", "macro_sql": "{% macro create_or_replace_view(run_outside_transaction_hooks=True) %}\n {%- set identifier = model['alias'] -%}\n\n {%- set old_relation = adapter.get_relation(database=database, schema=schema, identifier=identifier) -%}\n\n {%- set exists_as_view = (old_relation is not none and old_relation.is_view) -%}\n\n {%- set target_relation = api.Relation.create(\n identifier=identifier, schema=schema, database=database,\n type='view') -%}\n\n {% if run_outside_transaction_hooks %}\n -- no transactions on BigQuery\n {{ run_hooks(pre_hooks, inside_transaction=False) }}\n {% endif %}\n\n -- `BEGIN` happens here on Snowflake\n {{ run_hooks(pre_hooks, inside_transaction=True) }}\n\n -- If there's a table with the same name and we weren't told to full refresh,\n -- that's an error. If we were told to full refresh, drop it. This behavior differs\n -- for Snowflake and BigQuery, so multiple dispatch is used.\n {%- if old_relation is not none and old_relation.is_table -%}\n {{ handle_existing_table(should_full_refresh(), old_relation) }}\n {%- endif -%}\n\n -- build model\n {% call statement('main') -%}\n {{ create_view_as(target_relation, sql) }}\n {%- endcall %}\n\n {{ run_hooks(post_hooks, inside_transaction=True) }}\n\n {{ adapter.commit() }}\n\n {% if run_outside_transaction_hooks %}\n -- No transactions on BigQuery\n {{ run_hooks(post_hooks, inside_transaction=False) }}\n {% endif %}\n\n {{ return({'relations': [target_relation]}) }}\n\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.generate_alias_name": {"unique_id": "macro.dbt.generate_alias_name", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/etc/get_custom_alias.sql", "original_file_path": "macros/etc/get_custom_alias.sql", "name": "generate_alias_name", "macro_sql": "{% macro generate_alias_name(custom_alias_name=none, node=none) -%}\n\n {%- if custom_alias_name is none -%}\n\n {{ node.name }}\n\n {%- else -%}\n\n {{ custom_alias_name | trim }}\n\n {%- endif -%}\n\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.run_query": {"unique_id": "macro.dbt.run_query", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/etc/query.sql", "original_file_path": "macros/etc/query.sql", "name": "run_query", "macro_sql": "{% macro run_query(sql) %}\n {% call statement(\"run_query_statement\", fetch_result=true, auto_begin=false) %}\n {{ sql }}\n {% endcall %}\n\n {% do return(load_result(\"run_query_statement\").table) %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.is_incremental": {"unique_id": "macro.dbt.is_incremental", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/etc/is_incremental.sql", "original_file_path": "macros/etc/is_incremental.sql", "name": "is_incremental", "macro_sql": "{% macro is_incremental() %}\n {#-- do not run introspective queries in parsing #}\n {% if not execute %}\n {{ return(False) }}\n {% else %}\n {% set relation = adapter.get_relation(this.database, this.schema, this.table) %}\n {{ return(relation is not none\n and relation.type == 'table'\n and model.config.materialized == 'incremental'\n and not should_full_refresh()) }}\n {% endif %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.convert_datetime": {"unique_id": "macro.dbt.convert_datetime", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/etc/datetime.sql", "original_file_path": "macros/etc/datetime.sql", "name": "convert_datetime", "macro_sql": "{% macro convert_datetime(date_str, date_fmt) %}\n\n {% set error_msg -%}\n The provided partition date '{{ date_str }}' does not match the expected format '{{ date_fmt }}'\n {%- endset %}\n\n {% set res = try_or_compiler_error(error_msg, modules.datetime.datetime.strptime, date_str.strip(), date_fmt) %}\n {{ return(res) }}\n\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.dates_in_range": {"unique_id": "macro.dbt.dates_in_range", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/etc/datetime.sql", "original_file_path": "macros/etc/datetime.sql", "name": "dates_in_range", "macro_sql": "{% macro dates_in_range(start_date_str, end_date_str=none, in_fmt=\"%Y%m%d\", out_fmt=\"%Y%m%d\") %}\n {% set end_date_str = start_date_str if end_date_str is none else end_date_str %}\n\n {% set start_date = convert_datetime(start_date_str, in_fmt) %}\n {% set end_date = convert_datetime(end_date_str, in_fmt) %}\n\n {% set day_count = (end_date - start_date).days %}\n {% if day_count < 0 %}\n {% set msg -%}\n Partiton start date is after the end date ({{ start_date }}, {{ end_date }})\n {%- endset %}\n\n {{ exceptions.raise_compiler_error(msg, model) }}\n {% endif %}\n\n {% set date_list = [] %}\n {% for i in range(0, day_count + 1) %}\n {% set the_date = (modules.datetime.timedelta(days=i) + start_date) %}\n {% if not out_fmt %}\n {% set _ = date_list.append(the_date) %}\n {% else %}\n {% set _ = date_list.append(the_date.strftime(out_fmt)) %}\n {% endif %}\n {% endfor %}\n\n {{ return(date_list) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.partition_range": {"unique_id": "macro.dbt.partition_range", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/etc/datetime.sql", "original_file_path": "macros/etc/datetime.sql", "name": "partition_range", "macro_sql": "{% macro partition_range(raw_partition_date, date_fmt='%Y%m%d') %}\n {% set partition_range = (raw_partition_date | string).split(\",\") %}\n\n {% if (partition_range | length) == 1 %}\n {% set start_date = partition_range[0] %}\n {% set end_date = none %}\n {% elif (partition_range | length) == 2 %}\n {% set start_date = partition_range[0] %}\n {% set end_date = partition_range[1] %}\n {% else %}\n {{ exceptions.raise_compiler_error(\"Invalid partition time. Expected format: {Start Date}[,{End Date}]. Got: \" ~ raw_partition_date) }}\n {% endif %}\n\n {{ return(dates_in_range(start_date, end_date, in_fmt=date_fmt)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.py_current_timestring": {"unique_id": "macro.dbt.py_current_timestring", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/etc/datetime.sql", "original_file_path": "macros/etc/datetime.sql", "name": "py_current_timestring", "macro_sql": "{% macro py_current_timestring() %}\n {% set dt = modules.datetime.datetime.now() %}\n {% do return(dt.strftime(\"%Y%m%d%H%M%S%f\")) %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.generate_schema_name": {"unique_id": "macro.dbt.generate_schema_name", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/etc/get_custom_schema.sql", "original_file_path": "macros/etc/get_custom_schema.sql", "name": "generate_schema_name", "macro_sql": "{% macro generate_schema_name(custom_schema_name, node) -%}\n\n {%- set default_schema = target.schema -%}\n {%- if custom_schema_name is none -%}\n\n {{ default_schema }}\n\n {%- else -%}\n\n {{ default_schema }}_{{ custom_schema_name | trim }}\n\n {%- endif -%}\n\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.generate_schema_name_for_env": {"unique_id": "macro.dbt.generate_schema_name_for_env", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/etc/get_custom_schema.sql", "original_file_path": "macros/etc/get_custom_schema.sql", "name": "generate_schema_name_for_env", "macro_sql": "{% macro generate_schema_name_for_env(custom_schema_name, node) -%}\n\n {%- set default_schema = target.schema -%}\n {%- if target.name == 'prod' and custom_schema_name is not none -%}\n\n {{ custom_schema_name | trim }}\n\n {%- else -%}\n\n {{ default_schema }}\n\n {%- endif -%}\n\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.generate_database_name": {"unique_id": "macro.dbt.generate_database_name", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/etc/get_custom_database.sql", "original_file_path": "macros/etc/get_custom_database.sql", "name": "generate_database_name", "macro_sql": "{% macro generate_database_name(custom_database_name=none, node=none) -%}\n {% do return(adapter.dispatch('generate_database_name')(custom_database_name, node)) %}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": ["macro.dbt.default__generate_database_name"]}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__generate_database_name": {"unique_id": "macro.dbt.default__generate_database_name", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/etc/get_custom_database.sql", "original_file_path": "macros/etc/get_custom_database.sql", "name": "default__generate_database_name", "macro_sql": "{% macro default__generate_database_name(custom_database_name=none, node=none) -%}\n {%- set default_database = target.database -%}\n {%- if custom_database_name is none -%}\n\n {{ default_database }}\n\n {%- else -%}\n\n {{ custom_database_name }}\n\n {%- endif -%}\n\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.get_columns_in_query": {"unique_id": "macro.dbt.get_columns_in_query", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "get_columns_in_query", "macro_sql": "{% macro get_columns_in_query(select_sql) -%}\n {{ return(adapter.dispatch('get_columns_in_query')(select_sql)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__get_columns_in_query": {"unique_id": "macro.dbt.default__get_columns_in_query", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__get_columns_in_query", "macro_sql": "{% macro default__get_columns_in_query(select_sql) %}\n {% call statement('get_columns_in_query', fetch_result=True, auto_begin=False) -%}\n select * from (\n {{ select_sql }}\n ) as __dbt_sbq\n where false\n limit 0\n {% endcall %}\n\n {{ return(load_result('get_columns_in_query').table.columns | map(attribute='name') | list) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.create_schema": {"unique_id": "macro.dbt.create_schema", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "create_schema", "macro_sql": "{% macro create_schema(relation) -%}\n {{ adapter.dispatch('create_schema')(relation) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__create_schema": {"unique_id": "macro.dbt.default__create_schema", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__create_schema", "macro_sql": "{% macro default__create_schema(relation) -%}\n {%- call statement('create_schema') -%}\n create schema if not exists {{ relation.without_identifier() }}\n {% endcall %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.drop_schema": {"unique_id": "macro.dbt.drop_schema", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "drop_schema", "macro_sql": "{% macro drop_schema(relation) -%}\n {{ adapter.dispatch('drop_schema')(relation) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__drop_schema": {"unique_id": "macro.dbt.default__drop_schema", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__drop_schema", "macro_sql": "{% macro default__drop_schema(relation) -%}\n {%- call statement('drop_schema') -%}\n drop schema if exists {{ relation.without_identifier() }} cascade\n {% endcall %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.create_table_as": {"unique_id": "macro.dbt.create_table_as", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "create_table_as", "macro_sql": "{% macro create_table_as(temporary, relation, sql) -%}\n {{ adapter.dispatch('create_table_as')(temporary, relation, sql) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__create_table_as": {"unique_id": "macro.dbt.default__create_table_as", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__create_table_as", "macro_sql": "{% macro default__create_table_as(temporary, relation, sql) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none }}\n\n create {% if temporary: -%}temporary{%- endif %} table\n {{ relation.include(database=(not temporary), schema=(not temporary)) }}\n as (\n {{ sql }}\n );\n\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.create_view_as": {"unique_id": "macro.dbt.create_view_as", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "create_view_as", "macro_sql": "{% macro create_view_as(relation, sql) -%}\n {{ adapter.dispatch('create_view_as')(relation, sql) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__create_view_as": {"unique_id": "macro.dbt.default__create_view_as", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__create_view_as", "macro_sql": "{% macro default__create_view_as(relation, sql) -%}\n {%- set sql_header = config.get('sql_header', none) -%}\n\n {{ sql_header if sql_header is not none }}\n create view {{ relation }} as (\n {{ sql }}\n );\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.get_catalog": {"unique_id": "macro.dbt.get_catalog", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "get_catalog", "macro_sql": "{% macro get_catalog(information_schema, schemas) -%}\n {{ return(adapter.dispatch('get_catalog')(information_schema, schemas)) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": ["macro.dbt_snowflake.snowflake__get_catalog"]}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__get_catalog": {"unique_id": "macro.dbt.default__get_catalog", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__get_catalog", "macro_sql": "{% macro default__get_catalog(information_schema, schemas) -%}\n\n {% set typename = adapter.type() %}\n {% set msg -%}\n get_catalog not implemented for {{ typename }}\n {%- endset %}\n\n {{ exceptions.raise_compiler_error(msg) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.get_columns_in_relation": {"unique_id": "macro.dbt.get_columns_in_relation", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "get_columns_in_relation", "macro_sql": "{% macro get_columns_in_relation(relation) -%}\n {{ return(adapter.dispatch('get_columns_in_relation')(relation)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.sql_convert_columns_in_relation": {"unique_id": "macro.dbt.sql_convert_columns_in_relation", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "sql_convert_columns_in_relation", "macro_sql": "{% macro sql_convert_columns_in_relation(table) -%}\n {% set columns = [] %}\n {% for row in table %}\n {% do columns.append(api.Column(*row)) %}\n {% endfor %}\n {{ return(columns) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__get_columns_in_relation": {"unique_id": "macro.dbt.default__get_columns_in_relation", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__get_columns_in_relation", "macro_sql": "{% macro default__get_columns_in_relation(relation) -%}\n {{ exceptions.raise_not_implemented(\n 'get_columns_in_relation macro not implemented for adapter '+adapter.type()) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.alter_column_type": {"unique_id": "macro.dbt.alter_column_type", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "alter_column_type", "macro_sql": "{% macro alter_column_type(relation, column_name, new_column_type) -%}\n {{ return(adapter.dispatch('alter_column_type')(relation, column_name, new_column_type)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.alter_column_comment": {"unique_id": "macro.dbt.alter_column_comment", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "alter_column_comment", "macro_sql": "{% macro alter_column_comment(relation, column_dict) -%}\n {{ return(adapter.dispatch('alter_column_comment')(relation, column_dict)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__alter_column_comment": {"unique_id": "macro.dbt.default__alter_column_comment", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__alter_column_comment", "macro_sql": "{% macro default__alter_column_comment(relation, column_dict) -%}\n {{ exceptions.raise_not_implemented(\n 'alter_column_comment macro not implemented for adapter '+adapter.type()) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.alter_relation_comment": {"unique_id": "macro.dbt.alter_relation_comment", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "alter_relation_comment", "macro_sql": "{% macro alter_relation_comment(relation, relation_comment) -%}\n {{ return(adapter.dispatch('alter_relation_comment')(relation, relation_comment)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__alter_relation_comment": {"unique_id": "macro.dbt.default__alter_relation_comment", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__alter_relation_comment", "macro_sql": "{% macro default__alter_relation_comment(relation, relation_comment) -%}\n {{ exceptions.raise_not_implemented(\n 'alter_relation_comment macro not implemented for adapter '+adapter.type()) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.persist_docs": {"unique_id": "macro.dbt.persist_docs", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "persist_docs", "macro_sql": "{% macro persist_docs(relation, model, for_relation=true, for_columns=true) -%}\n {{ return(adapter.dispatch('persist_docs')(relation, model, for_relation, for_columns)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__persist_docs": {"unique_id": "macro.dbt.default__persist_docs", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__persist_docs", "macro_sql": "{% macro default__persist_docs(relation, model, for_relation, for_columns) -%}\n {% if for_relation and config.persist_relation_docs() and model.description %}\n {% do run_query(alter_relation_comment(relation, model.description)) %}\n {% endif %}\n\n {% if for_columns and config.persist_column_docs() and model.columns %}\n {% do run_query(alter_column_comment(relation, model.columns)) %}\n {% endif %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__alter_column_type": {"unique_id": "macro.dbt.default__alter_column_type", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__alter_column_type", "macro_sql": "{% macro default__alter_column_type(relation, column_name, new_column_type) -%}\n {#\n 1. Create a new column (w/ temp name and correct type)\n 2. Copy data over to it\n 3. Drop the existing column (cascade!)\n 4. Rename the new column to existing column\n #}\n {%- set tmp_column = column_name + \"__dbt_alter\" -%}\n\n {% call statement('alter_column_type') %}\n alter table {{ relation }} add column {{ adapter.quote(tmp_column) }} {{ new_column_type }};\n update {{ relation }} set {{ adapter.quote(tmp_column) }} = {{ adapter.quote(column_name) }};\n alter table {{ relation }} drop column {{ adapter.quote(column_name) }} cascade;\n alter table {{ relation }} rename column {{ adapter.quote(tmp_column) }} to {{ adapter.quote(column_name) }}\n {% endcall %}\n\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.drop_relation": {"unique_id": "macro.dbt.drop_relation", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "drop_relation", "macro_sql": "{% macro drop_relation(relation) -%}\n {{ return(adapter.dispatch('drop_relation')(relation)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__drop_relation": {"unique_id": "macro.dbt.default__drop_relation", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__drop_relation", "macro_sql": "{% macro default__drop_relation(relation) -%}\n {% call statement('drop_relation', auto_begin=False) -%}\n drop {{ relation.type }} if exists {{ relation }} cascade\n {%- endcall %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.truncate_relation": {"unique_id": "macro.dbt.truncate_relation", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "truncate_relation", "macro_sql": "{% macro truncate_relation(relation) -%}\n {{ return(adapter.dispatch('truncate_relation')(relation)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__truncate_relation": {"unique_id": "macro.dbt.default__truncate_relation", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__truncate_relation", "macro_sql": "{% macro default__truncate_relation(relation) -%}\n {% call statement('truncate_relation') -%}\n truncate table {{ relation }}\n {%- endcall %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.rename_relation": {"unique_id": "macro.dbt.rename_relation", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "rename_relation", "macro_sql": "{% macro rename_relation(from_relation, to_relation) -%}\n {{ return(adapter.dispatch('rename_relation')(from_relation, to_relation)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__rename_relation": {"unique_id": "macro.dbt.default__rename_relation", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__rename_relation", "macro_sql": "{% macro default__rename_relation(from_relation, to_relation) -%}\n {% set target_name = adapter.quote_as_configured(to_relation.identifier, 'identifier') %}\n {% call statement('rename_relation') -%}\n alter table {{ from_relation }} rename to {{ target_name }}\n {%- endcall %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.information_schema_name": {"unique_id": "macro.dbt.information_schema_name", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "information_schema_name", "macro_sql": "{% macro information_schema_name(database) %}\n {{ return(adapter.dispatch('information_schema_name')(database)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__information_schema_name": {"unique_id": "macro.dbt.default__information_schema_name", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__information_schema_name", "macro_sql": "{% macro default__information_schema_name(database) -%}\n {%- if database -%}\n {{ database }}.INFORMATION_SCHEMA\n {%- else -%}\n INFORMATION_SCHEMA\n {%- endif -%}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.list_schemas": {"unique_id": "macro.dbt.list_schemas", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "list_schemas", "macro_sql": "{% macro list_schemas(database) -%}\n {{ return(adapter.dispatch('list_schemas')(database)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__list_schemas": {"unique_id": "macro.dbt.default__list_schemas", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__list_schemas", "macro_sql": "{% macro default__list_schemas(database) -%}\n {% set sql %}\n select distinct schema_name\n from {{ information_schema_name(database) }}.SCHEMATA\n where catalog_name ilike '{{ database }}'\n {% endset %}\n {{ return(run_query(sql)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.check_schema_exists": {"unique_id": "macro.dbt.check_schema_exists", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "check_schema_exists", "macro_sql": "{% macro check_schema_exists(information_schema, schema) -%}\n {{ return(adapter.dispatch('check_schema_exists')(information_schema, schema)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__check_schema_exists": {"unique_id": "macro.dbt.default__check_schema_exists", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__check_schema_exists", "macro_sql": "{% macro default__check_schema_exists(information_schema, schema) -%}\n {% set sql -%}\n select count(*)\n from {{ information_schema.replace(information_schema_view='SCHEMATA') }}\n where catalog_name='{{ information_schema.database }}'\n and schema_name='{{ schema }}'\n {%- endset %}\n {{ return(run_query(sql)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.list_relations_without_caching": {"unique_id": "macro.dbt.list_relations_without_caching", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "list_relations_without_caching", "macro_sql": "{% macro list_relations_without_caching(schema_relation) %}\n {{ return(adapter.dispatch('list_relations_without_caching')(schema_relation)) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": ["macro.dbt_snowflake.snowflake__list_relations_without_caching"]}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__list_relations_without_caching": {"unique_id": "macro.dbt.default__list_relations_without_caching", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__list_relations_without_caching", "macro_sql": "{% macro default__list_relations_without_caching(schema_relation) %}\n {{ exceptions.raise_not_implemented(\n 'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.current_timestamp": {"unique_id": "macro.dbt.current_timestamp", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "current_timestamp", "macro_sql": "{% macro current_timestamp() -%}\n {{ adapter.dispatch('current_timestamp')() }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__current_timestamp": {"unique_id": "macro.dbt.default__current_timestamp", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__current_timestamp", "macro_sql": "{% macro default__current_timestamp() -%}\n {{ exceptions.raise_not_implemented(\n 'current_timestamp macro not implemented for adapter '+adapter.type()) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.collect_freshness": {"unique_id": "macro.dbt.collect_freshness", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "collect_freshness", "macro_sql": "{% macro collect_freshness(source, loaded_at_field, filter) %}\n {{ return(adapter.dispatch('collect_freshness')(source, loaded_at_field, filter))}}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__collect_freshness": {"unique_id": "macro.dbt.default__collect_freshness", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__collect_freshness", "macro_sql": "{% macro default__collect_freshness(source, loaded_at_field, filter) %}\n {% call statement('collect_freshness', fetch_result=True, auto_begin=False) -%}\n select\n max({{ loaded_at_field }}) as max_loaded_at,\n {{ current_timestamp() }} as snapshotted_at\n from {{ source }}\n {% if filter %}\n where {{ filter }}\n {% endif %}\n {% endcall %}\n {{ return(load_result('collect_freshness').table) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.make_temp_relation": {"unique_id": "macro.dbt.make_temp_relation", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "make_temp_relation", "macro_sql": "{% macro make_temp_relation(base_relation, suffix='__dbt_tmp') %}\n {{ return(adapter.dispatch('make_temp_relation')(base_relation, suffix))}}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__make_temp_relation": {"unique_id": "macro.dbt.default__make_temp_relation", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "default__make_temp_relation", "macro_sql": "{% macro default__make_temp_relation(base_relation, suffix) %}\n {% set tmp_identifier = base_relation.identifier ~ suffix %}\n {% set tmp_relation = base_relation.incorporate(\n path={\"identifier\": tmp_identifier}) -%}\n\n {% do return(tmp_relation) %}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.set_sql_header": {"unique_id": "macro.dbt.set_sql_header", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/adapters/common.sql", "original_file_path": "macros/adapters/common.sql", "name": "set_sql_header", "macro_sql": "{% macro set_sql_header(config) -%}\n {{ config.set('sql_header', caller()) }}\n{%- endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__test_relationships": {"unique_id": "macro.dbt.default__test_relationships", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/schema_tests/relationships.sql", "original_file_path": "macros/schema_tests/relationships.sql", "name": "default__test_relationships", "macro_sql": "{% macro default__test_relationships(model, to, field) %}\n\n{% set column_name = kwargs.get('column_name', kwargs.get('from')) %}\n\n\nselect count(*) as validation_errors\nfrom (\n select {{ column_name }} as id from {{ model }}\n) as child\nleft join (\n select {{ field }} as id from {{ to }}\n) as parent on parent.id = child.id\nwhere child.id is not null\n and parent.id is null\n\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.test_relationships": {"unique_id": "macro.dbt.test_relationships", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/schema_tests/relationships.sql", "original_file_path": "macros/schema_tests/relationships.sql", "name": "test_relationships", "macro_sql": "{% macro test_relationships(model, to, field) %}\n {% set macro = adapter.dispatch('test_relationships') %}\n {{ macro(model, to, field, **kwargs) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__test_not_null": {"unique_id": "macro.dbt.default__test_not_null", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/schema_tests/not_null.sql", "original_file_path": "macros/schema_tests/not_null.sql", "name": "default__test_not_null", "macro_sql": "{% macro default__test_not_null(model) %}\n\n{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %}\n\nselect count(*) as validation_errors\nfrom {{ model }}\nwhere {{ column_name }} is null\n\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.test_not_null": {"unique_id": "macro.dbt.test_not_null", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/schema_tests/not_null.sql", "original_file_path": "macros/schema_tests/not_null.sql", "name": "test_not_null", "macro_sql": "{% macro test_not_null(model) %}\n {% set macro = adapter.dispatch('test_not_null') %}\n {{ macro(model, **kwargs) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__test_unique": {"unique_id": "macro.dbt.default__test_unique", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/schema_tests/unique.sql", "original_file_path": "macros/schema_tests/unique.sql", "name": "default__test_unique", "macro_sql": "{% macro default__test_unique(model) %}\n\n{% set column_name = kwargs.get('column_name', kwargs.get('arg')) %}\n\nselect count(*) as validation_errors\nfrom (\n\n select\n {{ column_name }}\n\n from {{ model }}\n where {{ column_name }} is not null\n group by {{ column_name }}\n having count(*) > 1\n\n) validation_errors\n\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.test_unique": {"unique_id": "macro.dbt.test_unique", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/schema_tests/unique.sql", "original_file_path": "macros/schema_tests/unique.sql", "name": "test_unique", "macro_sql": "{% macro test_unique(model) %}\n {% set macro = adapter.dispatch('test_unique') %}\n {{ macro(model, **kwargs) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.default__test_accepted_values": {"unique_id": "macro.dbt.default__test_accepted_values", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/schema_tests/accepted_values.sql", "original_file_path": "macros/schema_tests/accepted_values.sql", "name": "default__test_accepted_values", "macro_sql": "{% macro default__test_accepted_values(model, values) %}\n\n{% set column_name = kwargs.get('column_name', kwargs.get('field')) %}\n{% set quote_values = kwargs.get('quote', True) %}\n\nwith all_values as (\n\n select distinct\n {{ column_name }} as value_field\n\n from {{ model }}\n\n),\n\nvalidation_errors as (\n\n select\n value_field\n\n from all_values\n where value_field not in (\n {% for value in values -%}\n {% if quote_values -%}\n '{{ value }}'\n {%- else -%}\n {{ value }}\n {%- endif -%}\n {%- if not loop.last -%},{%- endif %}\n {%- endfor %}\n )\n)\n\nselect count(*) as validation_errors\nfrom validation_errors\n\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}, "macro.dbt.test_accepted_values": {"unique_id": "macro.dbt.test_accepted_values", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "macros/schema_tests/accepted_values.sql", "original_file_path": "macros/schema_tests/accepted_values.sql", "name": "test_accepted_values", "macro_sql": "{% macro test_accepted_values(model, values) %}\n {% set macro = adapter.dispatch('test_accepted_values') %}\n {{ macro(model, values, **kwargs) }}\n{% endmacro %}", "resource_type": "macro", "tags": [], "depends_on": {"macros": []}, "description": "", "meta": {}, "docs": {"show": true}, "patch_path": null, "arguments": []}}, "docs": {"dbt_demo.table_events": {"unique_id": "dbt_demo.table_events", "package_name": "dbt_demo", "root_path": "/Users/grantseward/GitHub/dbt_demo", "path": "finance/docs.md", "original_file_path": "models/finance/docs.md", "name": "table_events", "block_contents": "This table contains clickstream events from the marketing website.\n\nThe events in this table are recorded by [Snowplow](http://github.com/snowplow/snowplow) and piped into the warehouse on an hourly basis. The following pages of the marketing site are tracked:\n - /\n - /about\n - /team\n - /contact-us"}, "dbt.__overview__": {"unique_id": "dbt.__overview__", "package_name": "dbt", "root_path": "/Users/grantseward/pyenv/dbt/lib/python3.7/site-packages/dbt/include/global_project", "path": "overview.md", "original_file_path": "docs/overview.md", "name": "__overview__", "block_contents": "### Welcome!\n\nWelcome to the auto-generated documentation for your dbt project!\n\n### Navigation\n\nYou can use the `Project` and `Database` navigation tabs on the left side of the window to explore the models\nin your project.\n\n#### Project Tab\nThe `Project` tab mirrors the directory structure of your dbt project. In this tab, you can see all of the\nmodels defined in your dbt project, as well as models imported from dbt packages.\n\n#### Database Tab\nThe `Database` tab also exposes your models, but in a format that looks more like a database explorer. This view\nshows relations (tables and views) grouped into database schemas. Note that ephemeral models are _not_ shown\nin this interface, as they do not exist in the database.\n\n### Graph Exploration\nYou can click the blue icon on the bottom-right corner of the page to view the lineage graph of your models.\n\nOn model pages, you'll see the immediate parents and children of the model you're exploring. By clicking the `Expand`\nbutton at the top-right of this lineage pane, you'll be able to see all of the models that are used to build,\nor are built from, the model you're exploring.\n\nOnce expanded, you'll be able to use the `--models` and `--exclude` model selection syntax to filter the\nmodels in the graph. For more information on model selection, check out the [dbt docs](https://docs.getdbt.com/docs/model-selection-syntax).\n\nNote that you can also right-click on models to interactively filter and explore the graph.\n\n---\n\n### More information\n\n- [What is dbt](https://docs.getdbt.com/docs/overview)?\n- Read the [dbt viewpoint](https://docs.getdbt.com/docs/viewpoint)\n- [Installation](https://docs.getdbt.com/docs/installation)\n- Join the [chat](https://community.getdbt.com/) on Slack for live questions and support."}}, "exposures": {}, "selectors": {}, "disabled": [], "parent_map": {"model.dbt_demo.fact_third_party_performance": ["model.dbt_demo.fact_catalog_returns"], "model.dbt_demo.fact_catalog_returns": [], "model.dbt_demo.raw_inventory_value": [], "model.dbt_demo.fact_warehouse_inventory": ["model.dbt_demo.raw_inventory_item_warehouse", "model.dbt_demo.raw_inventory_value"], "model.dbt_demo.raw_inventory_item_warehouse": [], "model.dbt_demo.fact_daily_expenses": ["model.dbt_demo.fact_warehouse_inventory"], "test.dbt_demo.unique_fact_catalog_returns_cr_item_sk": ["model.dbt_demo.fact_catalog_returns"], "test.dbt_demo.not_null_fact_catalog_returns_cr_item_sk": ["model.dbt_demo.fact_catalog_returns"], "test.dbt_demo.unique_fact_third_party_performance_company_id": ["model.dbt_demo.fact_third_party_performance"], "test.dbt_demo.not_null_fact_third_party_performance_company_id": ["model.dbt_demo.fact_third_party_performance"], "test.dbt_demo.unique_fact_third_party_performance_company_name": ["model.dbt_demo.fact_third_party_performance"], "test.dbt_demo.not_null_fact_third_party_performance_company_name": ["model.dbt_demo.fact_third_party_performance"]}, "child_map": {"model.dbt_demo.fact_third_party_performance": ["test.dbt_demo.not_null_fact_third_party_performance_company_id", "test.dbt_demo.not_null_fact_third_party_performance_company_name", "test.dbt_demo.unique_fact_third_party_performance_company_id", "test.dbt_demo.unique_fact_third_party_performance_company_name"], "model.dbt_demo.fact_catalog_returns": ["model.dbt_demo.fact_third_party_performance", "test.dbt_demo.not_null_fact_catalog_returns_cr_item_sk", "test.dbt_demo.unique_fact_catalog_returns_cr_item_sk"], "model.dbt_demo.raw_inventory_value": ["model.dbt_demo.fact_warehouse_inventory"], "model.dbt_demo.fact_warehouse_inventory": ["model.dbt_demo.fact_daily_expenses"], "model.dbt_demo.raw_inventory_item_warehouse": ["model.dbt_demo.fact_warehouse_inventory"], "model.dbt_demo.fact_daily_expenses": [], "test.dbt_demo.unique_fact_catalog_returns_cr_item_sk": [], "test.dbt_demo.not_null_fact_catalog_returns_cr_item_sk": [], "test.dbt_demo.unique_fact_third_party_performance_company_id": [], "test.dbt_demo.not_null_fact_third_party_performance_company_id": [], "test.dbt_demo.unique_fact_third_party_performance_company_name": [], "test.dbt_demo.not_null_fact_third_party_performance_company_name": []}} diff --git a/databuilder/example/scripts/sample_dbt_loader.py b/databuilder/example/scripts/sample_dbt_loader.py new file mode 100644 index 0000000000..f87415332e --- /dev/null +++ b/databuilder/example/scripts/sample_dbt_loader.py @@ -0,0 +1,178 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +""" +This is a example script demonstrating how to load data into Neo4j and +Elasticsearch without using an Airflow DAG. +It contains several jobs: +- `run_csv_job`: runs a job that extracts table data from a CSV, loads (writes) + this into a different local directory as a csv, then publishes this data to + neo4j. +- `run_table_column_job`: does the same thing as `run_csv_job`, but with a csv + containing column data. +- `create_last_updated_job`: creates a job that gets the current time, dumps it + into a predefined model schema, and publishes this to neo4j. +- `create_es_publisher_sample_job`: creates a job that extracts data from neo4j + and pubishes it into elasticsearch. +For other available extractors, please take a look at +https://github.com/amundsen-io/amundsendatabuilder#list-of-extractors +""" + +import json +import logging +import os +import sys +import uuid + +from elasticsearch import Elasticsearch +from pyhocon import ConfigFactory +from sqlalchemy.ext.declarative import declarative_base + +from databuilder.extractor.dbt_extractor import DbtExtractor +from databuilder.extractor.neo4j_search_data_extractor import Neo4jSearchDataExtractor +from databuilder.job.job import DefaultJob +from databuilder.loader.file_system_elasticsearch_json_loader import FSElasticsearchJSONLoader +from databuilder.loader.file_system_neo4j_csv_loader import FsNeo4jCSVLoader +from databuilder.publisher.elasticsearch_publisher import ElasticsearchPublisher +from databuilder.publisher.neo4j_csv_publisher import Neo4jCsvPublisher +from databuilder.task.task import DefaultTask +from databuilder.transformer.base_transformer import NoopTransformer + +es_host = os.getenv('CREDENTIALS_ELASTICSEARCH_PROXY_HOST', 'localhost') +neo_host = os.getenv('CREDENTIALS_NEO4J_PROXY_HOST', 'localhost') + +es_port = os.getenv('CREDENTIALS_ELASTICSEARCH_PROXY_PORT', 9200) +neo_port = os.getenv('CREDENTIALS_NEO4J_PROXY_PORT', 7687) +if len(sys.argv) > 1: + es_host = sys.argv[1] +if len(sys.argv) > 2: + neo_host = sys.argv[2] + +es = Elasticsearch([ + {'host': es_host, 'port': es_port}, +]) + +Base = declarative_base() + +NEO4J_ENDPOINT = f'bolt://{neo_host}:{neo_port}' + +neo4j_endpoint = NEO4J_ENDPOINT + +neo4j_user = 'neo4j' +neo4j_password = 'test' + +LOGGER = logging.getLogger(__name__) + + +def run_dbt_job(database_name, catalog_file_loc, manifest_file_loc, source_url=None): + tmp_folder = f'/var/tmp/amundsen/dbt_run' + node_files_folder = f'{tmp_folder}/nodes' + relationship_files_folder = f'{tmp_folder}/relationships' + + dbt_extractor = DbtExtractor() + csv_loader = FsNeo4jCSVLoader() + + task = DefaultTask(extractor=dbt_extractor, + loader=csv_loader, + transformer=NoopTransformer()) + + # Catalop and manifest files can be passed in as file locations or a valid python + # dict, allowing you to retrieve the files from S3 or another source and pass it in + with open(manifest_file_loc, 'rb') as f: + manifest_data = json.load(f) + + job_config = ConfigFactory.from_dict({ + 'extractor.dbt.database_name': database_name, + 'extractor.dbt.catalog_json': catalog_file_loc, # File + 'extractor.dbt.manifest_json': json.dumps(manifest_data), # JSON Dumped objecy + 'extractor.dbt.source_url': source_url, + 'loader.filesystem_csv_neo4j.node_dir_path': node_files_folder, + 'loader.filesystem_csv_neo4j.relationship_dir_path': relationship_files_folder, + 'loader.filesystem_csv_neo4j.delete_created_directories': True, + 'publisher.neo4j.node_files_directory': node_files_folder, + 'publisher.neo4j.relation_files_directory': relationship_files_folder, + 'publisher.neo4j.neo4j_endpoint': neo4j_endpoint, + 'publisher.neo4j.neo4j_user': neo4j_user, + 'publisher.neo4j.neo4j_password': neo4j_password, + 'publisher.neo4j.neo4j_encrypted': False, + 'publisher.neo4j.job_publish_tag': 'unique_tag', # should use unique tag here like {ds} + }) + + DefaultJob(conf=job_config, + task=task, + publisher=Neo4jCsvPublisher()).launch() + + +def create_es_publisher_sample_job(elasticsearch_index_alias='table_search_index', + elasticsearch_doc_type_key='table', + model_name='databuilder.models.table_elasticsearch_document.TableESDocument', + entity_type='table', + elasticsearch_mapping=None): + """ + :param elasticsearch_index_alias: alias for Elasticsearch used in + amundsensearchlibrary/search_service/config.py as an index + :param elasticsearch_doc_type_key: name the ElasticSearch index is prepended with. Defaults to `table` resulting in + `table_{uuid}` + :param model_name: the Databuilder model class used in transporting between Extractor and Loader + :param entity_type: Entity type handed to the `Neo4jSearchDataExtractor` class, used to determine + Cypher query to extract data from Neo4j. Defaults to `table`. + :param elasticsearch_mapping: Elasticsearch field mapping "DDL" handed to the `ElasticsearchPublisher` class, + if None is given (default) it uses the `Table` query baked into the Publisher + """ + # loader saves data to this location and publisher reads it from here + extracted_search_data_path = '/var/tmp/amundsen/search_data.json' + + task = DefaultTask(loader=FSElasticsearchJSONLoader(), + extractor=Neo4jSearchDataExtractor(), + transformer=NoopTransformer()) + + # elastic search client instance + elasticsearch_client = es + # unique name of new index in Elasticsearch + elasticsearch_new_index_key = f'{elasticsearch_doc_type_key}_{uuid.uuid4()}' + + job_config = ConfigFactory.from_dict({ + 'extractor.search_data.entity_type': entity_type, + 'extractor.search_data.extractor.neo4j.graph_url': neo4j_endpoint, + 'extractor.search_data.extractor.neo4j.model_class': model_name, + 'extractor.search_data.extractor.neo4j.neo4j_auth_user': neo4j_user, + 'extractor.search_data.extractor.neo4j.neo4j_auth_pw': neo4j_password, + 'extractor.search_data.extractor.neo4j.neo4j_encrypted': False, + 'loader.filesystem.elasticsearch.file_path': extracted_search_data_path, + 'loader.filesystem.elasticsearch.mode': 'w', + 'publisher.elasticsearch.file_path': extracted_search_data_path, + 'publisher.elasticsearch.mode': 'r', + 'publisher.elasticsearch.client': elasticsearch_client, + 'publisher.elasticsearch.new_index': elasticsearch_new_index_key, + 'publisher.elasticsearch.doc_type': elasticsearch_doc_type_key, + 'publisher.elasticsearch.alias': elasticsearch_index_alias, + }) + + # only optionally add these keys, so need to dynamically `put` them + if elasticsearch_mapping: + job_config.put(f'publisher.elasticsearch.{ElasticsearchPublisher.ELASTICSEARCH_MAPPING_CONFIG_KEY}', + elasticsearch_mapping) + + job = DefaultJob(conf=job_config, + task=task, + publisher=ElasticsearchPublisher()) + return job + + +if __name__ == "__main__": + # Uncomment next line to get INFO level logging + # logging.basicConfig(level=logging.INFO) + + run_dbt_job( + database_name='snowflake', + catalog_file_loc='example/sample_data/dbt/catalog.json', + manifest_file_loc='example/sample_data/dbt/manifest.json', + source_url='https://github.com/your-company/your-repo/tree/main' + ) + + job_es_table = create_es_publisher_sample_job( + elasticsearch_index_alias='table_search_index', + elasticsearch_doc_type_key='table', + entity_type='table', + model_name='databuilder.models.table_elasticsearch_document.TableESDocument') + job_es_table.launch() diff --git a/databuilder/requirements.txt b/databuilder/requirements.txt index d2a577c693..205e9afa6b 100644 --- a/databuilder/requirements.txt +++ b/databuilder/requirements.txt @@ -42,7 +42,7 @@ elasticsearch>=6.2.0,<7.0 atomicwrites==1.1.5 more-itertools==4.2.0 pluggy>=0.6.0 -py==1.5.3 +py==1.10.0 pyhocon==0.3.42 pyparsing==2.2.0 sqlalchemy>=1.3.6,<1.4 diff --git a/databuilder/setup.py b/databuilder/setup.py index 4e238d7246..4daedb27d4 100644 --- a/databuilder/setup.py +++ b/databuilder/setup.py @@ -4,7 +4,7 @@ from setuptools import find_packages, setup -__version__ = '4.3.1' +__version__ = '4.4.0' requirements = [ @@ -81,7 +81,7 @@ ] rds = [ - 'sqlalchemy>=1.3.6,<1.4' + 'sqlalchemy>=1.3.6,<1.4', 'mysqlclient>=1.3.6,<3' ] diff --git a/databuilder/tests/unit/extractor/dashboard/mode_analytics/batch/__init__.py b/databuilder/tests/unit/extractor/dashboard/mode_analytics/batch/__init__.py deleted file mode 100644 index f3145d75b3..0000000000 --- a/databuilder/tests/unit/extractor/dashboard/mode_analytics/batch/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright Contributors to the Amundsen project. -# SPDX-License-Identifier: Apache-2.0 diff --git a/databuilder/tests/unit/extractor/dashboard/mode_analytics/batch/test_mode_dashboard_charts_batch_extractor.py b/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_charts_batch_extractor.py similarity index 55% rename from databuilder/tests/unit/extractor/dashboard/mode_analytics/batch/test_mode_dashboard_charts_batch_extractor.py rename to databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_charts_batch_extractor.py index 5e11c663ff..73bdbfaec7 100644 --- a/databuilder/tests/unit/extractor/dashboard/mode_analytics/batch/test_mode_dashboard_charts_batch_extractor.py +++ b/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_charts_batch_extractor.py @@ -7,17 +7,16 @@ from pyhocon import ConfigFactory from databuilder import Scoped -from databuilder.extractor.dashboard.mode_analytics.batch.mode_dashboard_charts_batch_extractor import ( +from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_charts_batch_extractor import ( ModeDashboardChartsBatchExtractor, ) +from databuilder.models.dashboard.dashboard_chart import DashboardChart class TestModeDashboardChartsBatchExtractor(unittest.TestCase): def setUp(self) -> None: config = ConfigFactory.from_dict({ 'extractor.mode_dashboard_chart_batch.organization': 'amundsen', - 'extractor.mode_dashboard_chart_batch.mode_user_token': 'amundsen_user_token', - 'extractor.mode_dashboard_chart_batch.mode_password_token': 'amundsen_password_token', 'extractor.mode_dashboard_chart_batch.mode_bearer_token': 'amundsen_bearer_token', }) self.config = config @@ -26,7 +25,8 @@ def test_dashboard_chart_extractor_empty_record(self) -> None: extractor = ModeDashboardChartsBatchExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope())) - with patch('databuilder.rest_api.rest_api_query.requests.get'): + with patch('databuilder.rest_api.rest_api_query.RestApiQuery._send_request') as mock_request: + mock_request.return_value.json.return_value = {'charts': []} record = extractor.extract() self.assertIsNone(record) @@ -34,26 +34,30 @@ def test_dashboard_chart_extractor_actual_record(self) -> None: extractor = ModeDashboardChartsBatchExtractor() extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope())) - with patch('databuilder.extractor.restapi.rest_api_extractor.RestAPIExtractor.extract') as mock_get: - mock_get.return_value = { - 'organization': 'amundsen', - 'is_active': None, - 'updated_at': None, - 'do_not_update_empty_attribute': True, - 'dashboard_group_id': 'ggg', - 'dashboard_id': 'ddd', - 'query_id': 'yyy', - 'chart_id': 'xxx', - 'chart_name': 'some chart', - 'chart_type': 'bigNumber', - 'product': 'mode' + with patch('databuilder.rest_api.rest_api_query.RestApiQuery._send_request') as mock_request: + mock_request.return_value.json.return_value = { + 'charts': [ + { + 'space_token': 'ggg', + 'report_token': 'ddd', + 'query_token': 'yyy', + 'token': 'xxx', + 'chart_title': 'some chart', + 'chart_type': 'bigNumber' + } + ] } record = extractor.extract() + self.assertIsInstance(record, DashboardChart) self.assertEqual(record._dashboard_group_id, 'ggg') self.assertEqual(record._dashboard_id, 'ddd') + self.assertEqual(record._query_id, 'yyy') + self.assertEqual(record._chart_id, 'xxx') self.assertEqual(record._chart_name, 'some chart') + self.assertEqual(record._chart_type, 'bigNumber') self.assertEqual(record._product, 'mode') + self.assertEqual(record._cluster, 'gold') if __name__ == '__main__': diff --git a/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_extractor.py b/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_extractor.py new file mode 100644 index 0000000000..a55d98fa97 --- /dev/null +++ b/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_extractor.py @@ -0,0 +1,63 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +import unittest + +from mock import patch +from pyhocon import ConfigFactory + +from databuilder import Scoped +from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_extractor import ModeDashboardExtractor +from databuilder.models.dashboard.dashboard_metadata import DashboardMetadata + + +class TestModeDashboardExtractor(unittest.TestCase): + def setUp(self) -> None: + config = ConfigFactory.from_dict({ + 'extractor.mode_dashboard.organization': 'amundsen', + 'extractor.mode_dashboard.mode_bearer_token': 'amundsen_bearer_token', + }) + self.config = config + + def test_extractor_extract_record(self) -> None: + extractor = ModeDashboardExtractor() + extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope())) + + with patch('databuilder.rest_api.rest_api_query.RestApiQuery._send_request') as mock_request, \ + patch('databuilder.rest_api.query_merger.QueryMerger._compute_query_result') as mock_query_result: + mock_request.return_value.json.return_value = { + 'reports': [ + { + 'token': 'ddd', + 'name': 'dashboard name', + 'description': 'dashboard description', + 'created_at': '2021-02-05T21:20:09.019Z', + 'space_token': 'ggg', + } + ] + } + mock_query_result.return_value = { + 'ggg': { + 'dashboard_group_id': 'ggg', + 'dashboard_group': 'dashboard group name', + 'dashboard_group_description': 'dashboard group description', + } + } + + record = next(extractor.extract()) + self.assertIsInstance(record, DashboardMetadata) + self.assertEqual(record.dashboard_group, 'dashboard group name') + self.assertEqual(record.dashboard_name, 'dashboard name') + self.assertEqual(record.description, 'dashboard description') + self.assertEqual(record.cluster, 'gold') + self.assertEqual(record.product, 'mode') + self.assertEqual(record.dashboard_group_id, 'ggg') + self.assertEqual(record.dashboard_id, 'ddd') + self.assertEqual(record.dashboard_group_description, 'dashboard group description') + self.assertEqual(record.created_timestamp, 1612560009) + self.assertEqual(record.dashboard_group_url, 'https://app.mode.com/amundsen/spaces/ggg') + self.assertEqual(record.dashboard_url, 'https://app.mode.com/amundsen/reports/ddd') + + +if __name__ == '__main__': + unittest.main() diff --git a/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_last_modified_timestamp_extractor.py b/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_last_modified_timestamp_extractor.py new file mode 100644 index 0000000000..a0574bb622 --- /dev/null +++ b/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_last_modified_timestamp_extractor.py @@ -0,0 +1,49 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +import unittest + +from mock import patch +from pyhocon import ConfigFactory + +from databuilder import Scoped +from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_last_modified_timestamp_extractor import ( + ModeDashboardLastModifiedTimestampExtractor, +) +from databuilder.models.dashboard.dashboard_last_modified import DashboardLastModifiedTimestamp + + +class TestModeDashboardLastModifiedTimestampExtractor(unittest.TestCase): + def setUp(self) -> None: + config = ConfigFactory.from_dict({ + 'extractor.mode_dashboard_last_modified_timestamp_execution.organization': 'amundsen', + 'extractor.mode_dashboard_last_modified_timestamp_execution.mode_bearer_token': 'amundsen_bearer_token', + }) + self.config = config + + def test_extractor_extract_record(self) -> None: + extractor = ModeDashboardLastModifiedTimestampExtractor() + extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope())) + + with patch('databuilder.rest_api.rest_api_query.RestApiQuery._send_request') as mock_request: + mock_request.return_value.json.return_value = { + 'reports': [ + { + 'space_token': 'ggg', + 'token': 'ddd', + 'edited_at': '2021-02-05T21:20:09.019Z', + } + ] + } + + record = next(extractor.extract()) + self.assertIsInstance(record, DashboardLastModifiedTimestamp) + self.assertEqual(record._dashboard_group_id, 'ggg') + self.assertEqual(record._dashboard_id, 'ddd') + self.assertEqual(record._last_modified_timestamp, 1612560009) + self.assertEqual(record._product, 'mode') + self.assertEqual(record._cluster, 'gold') + + +if __name__ == '__main__': + unittest.main() diff --git a/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_last_successful_executions_extractor.py b/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_last_successful_executions_extractor.py new file mode 100644 index 0000000000..29bf029938 --- /dev/null +++ b/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_last_successful_executions_extractor.py @@ -0,0 +1,51 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +import unittest + +from mock import patch +from pyhocon import ConfigFactory + +from databuilder import Scoped +from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_last_successful_executions_extractor import ( + ModeDashboardLastSuccessfulExecutionExtractor, +) +from databuilder.models.dashboard.dashboard_execution import DashboardExecution + + +class TestModeDashboardLastModifiedTimestampExtractor(unittest.TestCase): + def setUp(self) -> None: + config = ConfigFactory.from_dict({ + 'extractor.mode_dashboard_last_successful_execution.organization': 'amundsen', + 'extractor.mode_dashboard_last_successful_execution.mode_bearer_token': 'amundsen_bearer_token', + }) + self.config = config + + def test_extractor_extract_record(self) -> None: + extractor = ModeDashboardLastSuccessfulExecutionExtractor() + extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope())) + + with patch('databuilder.rest_api.rest_api_query.RestApiQuery._send_request') as mock_request: + mock_request.return_value.json.return_value = { + 'reports': [ + { + 'space_token': 'ggg', + 'token': 'ddd', + 'last_successfully_run_at': '2021-02-05T21:20:09.019Z', + } + ] + } + + record = next(extractor.extract()) + self.assertIsInstance(record, DashboardExecution) + self.assertEqual(record._dashboard_group_id, 'ggg') + self.assertEqual(record._dashboard_id, 'ddd') + self.assertEqual(record._execution_timestamp, 1612560009) + self.assertEqual(record._execution_state, 'succeeded') + self.assertEqual(record._product, 'mode') + self.assertEqual(record._cluster, 'gold') + self.assertEqual(record._execution_id, '_last_successful_execution') + + +if __name__ == '__main__': + unittest.main() diff --git a/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_owner_extractor.py b/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_owner_extractor.py new file mode 100644 index 0000000000..5efbf259df --- /dev/null +++ b/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_owner_extractor.py @@ -0,0 +1,47 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +import unittest + +from mock import patch +from pyhocon import ConfigFactory + +from databuilder import Scoped +from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_owner_extractor import ModeDashboardOwnerExtractor +from databuilder.models.dashboard.dashboard_owner import DashboardOwner + + +class TestModeDashboardLastModifiedTimestampExtractor(unittest.TestCase): + def setUp(self) -> None: + config = ConfigFactory.from_dict({ + 'extractor.mode_dashboard_owner.organization': 'amundsen', + 'extractor.mode_dashboard_owner.mode_bearer_token': 'amundsen_bearer_token', + }) + self.config = config + + def test_extractor_extract_record(self) -> None: + extractor = ModeDashboardOwnerExtractor() + extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope())) + + with patch('databuilder.rest_api.rest_api_query.RestApiQuery._send_request') as mock_request: + mock_request.return_value.json.return_value = { + 'reports': [ + { + 'space_token': 'ggg', + 'token': 'ddd', + 'creator_email': 'amundsen@abc.com', + } + ] + } + + record = extractor.extract() + self.assertIsInstance(record, DashboardOwner) + self.assertEqual(record._dashboard_group_id, 'ggg') + self.assertEqual(record._dashboard_id, 'ddd') + self.assertEqual(record._email, 'amundsen@abc.com') + self.assertEqual(record._product, 'mode') + self.assertEqual(record._cluster, 'gold') + + +if __name__ == '__main__': + unittest.main() diff --git a/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_queries_extractor.py b/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_queries_extractor.py new file mode 100644 index 0000000000..cf906ee5b7 --- /dev/null +++ b/databuilder/tests/unit/extractor/dashboard/mode_analytics/test_mode_dashboard_queries_extractor.py @@ -0,0 +1,54 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +import unittest + +from mock import patch +from pyhocon import ConfigFactory + +from databuilder import Scoped +from databuilder.extractor.dashboard.mode_analytics.mode_dashboard_queries_extractor import ( + ModeDashboardQueriesExtractor, +) +from databuilder.models.dashboard.dashboard_query import DashboardQuery + + +class TestModeDashboardLastModifiedTimestampExtractor(unittest.TestCase): + def setUp(self) -> None: + config = ConfigFactory.from_dict({ + 'extractor.mode_dashboard_query.organization': 'amundsen', + 'extractor.mode_dashboard_query.mode_bearer_token': 'amundsen_bearer_token', + }) + self.config = config + + def test_extractor_extract_record(self) -> None: + extractor = ModeDashboardQueriesExtractor() + extractor.init(Scoped.get_scoped_conf(conf=self.config, scope=extractor.get_scope())) + + with patch('databuilder.rest_api.rest_api_query.RestApiQuery._send_request') as mock_request: + mock_request.return_value.json.return_value = { + 'queries': [ + { + 'space_token': 'ggg', + 'report_token': 'ddd', + 'token': 'qqq', + 'name': 'this query name', + 'raw_query': 'select 1', + } + ] + } + + record = next(extractor.extract()) + self.assertIsInstance(record, DashboardQuery) + self.assertEqual(record._dashboard_group_id, 'ggg') + self.assertEqual(record._dashboard_id, 'ddd') + self.assertEqual(record._query_id, 'qqq') + self.assertEqual(record._query_name, 'this query name') + self.assertEqual(record._query_text, 'select 1') + self.assertEqual(record._url, 'https://app.mode.com/amundsen/reports/ddd/queries/qqq') + self.assertEqual(record._product, 'mode') + self.assertEqual(record._cluster, 'gold') + + +if __name__ == '__main__': + unittest.main() diff --git a/databuilder/tests/unit/extractor/test_dbt_extractor.py b/databuilder/tests/unit/extractor/test_dbt_extractor.py new file mode 100644 index 0000000000..3da9f240a5 --- /dev/null +++ b/databuilder/tests/unit/extractor/test_dbt_extractor.py @@ -0,0 +1,355 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +import json +import unittest +from typing import ( + Any, Optional, Union, no_type_check, +) + +import pyhocon +import pytest +from pyhocon import ConfigFactory + +from databuilder import Scoped +from databuilder.extractor.dbt_extractor import DbtExtractor, InvalidDbtInputs +from databuilder.models.badge import Badge, BadgeMetadata +from databuilder.models.table_lineage import TableLineage +from databuilder.models.table_metadata import TableMetadata +from databuilder.models.table_source import TableSource + + +def _extract_until_not_these(extractor: DbtExtractor, + classes: Any) -> Optional[Union[BadgeMetadata, TableLineage, TableMetadata, TableSource]]: + # Move to the next type of extracted class: + r = extractor.extract() + while isinstance(r, tuple(classes)): + r = extractor.extract() + return r + + +class TestCsvExtractor(unittest.TestCase): + + database_name = 'snowflake' + catalog_file_loc = 'example/sample_data/dbt/catalog.json' + manifest_data = 'example/sample_data/dbt/manifest.json' + source_url = 'test_url' + + @no_type_check + def test_extraction_with_model_class(self) -> None: + """ + Test Extraction using model class + """ + config_dict = { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name, + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': self.catalog_file_loc, + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': self.manifest_data, + f'extractor.dbt.{DbtExtractor.SOURCE_URL}': self.source_url + } + self.conf = ConfigFactory.from_dict(config_dict) + extractor = DbtExtractor() + extractor.init(Scoped.get_scoped_conf(conf=self.conf, + scope=extractor.get_scope())) + + # One block of tests for each type of model created + extracted_classes = [] + + result = extractor.extract() + self.assertTrue(isinstance(result, TableMetadata)) + self.assertEqual(result.name, 'fact_third_party_performance') + self.assertEqual(result.description.text, 'The performance for third party vendors loss rate by day.') + self.assertEqual(result.database, self.database_name) + self.assertEqual(result.cluster, 'dbt_demo') + self.assertEqual(result.schema, 'public') + self.assertEqual(result.tags, []) + self.assertEqual(result.is_view, True) + extracted_classes.append(TableMetadata) + + result2 = _extract_until_not_these(extractor, extracted_classes) + self.assertTrue(isinstance(result2, TableSource)) + self.assertEqual(result2.db, self.database_name) + self.assertEqual(result2.cluster, 'dbt_demo') + self.assertEqual(result2.schema, 'public') + self.assertEqual(result2.table, 'fact_third_party_performance') + self.assertEqual(result2.source, 'test_url/models/call_center/fact_third_party_performance.sql') + extracted_classes.append(TableSource) + + result3 = _extract_until_not_these(extractor, extracted_classes) + self.assertTrue(isinstance(result3, BadgeMetadata)) + self.assertEqual(result3.badges, [Badge('finance', 'table'), Badge('certified', 'table')]) + extracted_classes.append(BadgeMetadata) + + result4 = _extract_until_not_these(extractor, extracted_classes) + self.assertTrue(isinstance(result4, TableLineage)) + self.assertEqual(result4.table_key, 'snowflake://dbt_demo.public/fact_catalog_returns') + self.assertEqual(result4.downstream_deps, ['snowflake://dbt_demo.public/fact_third_party_performance']) + extracted_classes.append(TableLineage) + + # Should not be any other unique models created + result5 = _extract_until_not_these(extractor, extracted_classes) + self.assertEqual(result5, None) + + @no_type_check + def test_dbt_file_inputs_as_json_dumps(self) -> None: + """ + Tests to ensure that the same content can be extracted when the manifest.json + and catalog.json are provided as a file location or as a json.dumps() object + """ + config_dict_1 = { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name, + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': self.catalog_file_loc, + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': self.manifest_data, + f'extractor.dbt.{DbtExtractor.SOURCE_URL}': self.source_url + } + conf_1 = ConfigFactory.from_dict(config_dict_1) + extractor_1 = DbtExtractor() + extractor_1.init(Scoped.get_scoped_conf(conf=conf_1, scope=extractor_1.get_scope())) + + with open(self.catalog_file_loc, 'r') as f: + catalog_as_json = json.dumps(json.load(f)) + + with open(self.manifest_data, 'r') as f: + manifest_as_json = json.dumps(json.load(f)) + + config_dict_2 = { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name, + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': catalog_as_json, + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': manifest_as_json + } + conf_2 = ConfigFactory.from_dict(config_dict_2) + extractor_2 = DbtExtractor() + extractor_2.init(Scoped.get_scoped_conf(conf=conf_2, scope=extractor_2.get_scope())) + + result_1 = extractor_1.extract() + result_2 = extractor_2.extract() + self.assertEqual(result_1.name, result_2.name) + self.assertEqual(result_1.description.text, result_2.description.text) + self.assertEqual(result_1.database, result_2.database) + self.assertEqual(result_1.cluster, result_2.cluster) + self.assertEqual(result_1.schema, result_2.schema) + self.assertEqual(result_1.tags, result_2.tags) + self.assertEqual(result_1.is_view, result_2.is_view) + + @no_type_check + def test_keys_retain_original_format(self) -> None: + """ + Test that the database name, cluster name, schema and table name do not + have lowercase auto applied. + """ + config_dict = { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name.upper(), # Force upper for test + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': self.catalog_file_loc, + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': self.manifest_data, + f'extractor.dbt.{DbtExtractor.FORCE_TABLE_KEY_LOWER}': False + } + conf = ConfigFactory.from_dict(config_dict) + extractor = DbtExtractor() + extractor.init(Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) + + result = extractor.extract() + + self.assertEqual(result.name, 'fact_third_party_performance') + self.assertEqual(result.database, 'SNOWFLAKE') + self.assertEqual(result.cluster, 'DBT_DEMO') + self.assertEqual(result.schema, 'PUBLIC') + + def test_do_not_extract_tables(self) -> None: + """ + Test that tables are not extracted. + """ + config_dict = { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name.upper(), + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': self.catalog_file_loc, + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': self.manifest_data, + f'extractor.dbt.{DbtExtractor.EXTRACT_TABLES}': False + } + conf = ConfigFactory.from_dict(config_dict) + extractor = DbtExtractor() + extractor.init(Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) + + has_next = True + while has_next: + extraction = extractor.extract() + self.assertFalse(isinstance(extraction, TableMetadata)) + if extraction is None: + break + + def test_do_not_extract_descriptions(self) -> None: + """ + Test that tables are not extracted. + """ + config_dict = { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name.upper(), + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': self.catalog_file_loc, + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': self.manifest_data, + f'extractor.dbt.{DbtExtractor.EXTRACT_DESCRIPTIONS}': False + } + conf = ConfigFactory.from_dict(config_dict) + extractor = DbtExtractor() + extractor.init(Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) + + has_next = True + while has_next: + extraction = extractor.extract() + if isinstance(extraction, TableMetadata): + # No table descriptions + self.assertEqual(extraction.description, None) + + # No column descriptions + for col in extraction.columns: + self.assertEqual(col.description, None) + + if extraction is None: + break + + def test_do_not_extract_dbt_tags(self) -> None: + """ + Test that tags are not extracted as Badges + """ + config_dict = { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name.upper(), + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': self.catalog_file_loc, + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': self.manifest_data, + f'extractor.dbt.{DbtExtractor.EXTRACT_TAGS}': False + } + conf = ConfigFactory.from_dict(config_dict) + extractor = DbtExtractor() + extractor.init(Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) + + has_next = True + while has_next: + extraction = extractor.extract() + self.assertFalse(isinstance(extraction, BadgeMetadata)) + if extraction is None: + break + + def test_import_tags_as_tags(self) -> None: + """ + Test that dbt tags can be configured to be imported as Amundsen tags. + """ + config_dict = { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name.upper(), + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': self.catalog_file_loc, + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': self.manifest_data, + f'extractor.dbt.{DbtExtractor.IMPORT_TAGS_AS}': 'tag' + } + conf = ConfigFactory.from_dict(config_dict) + extractor = DbtExtractor() + extractor.init(Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) + + # The 7th table has tags + extraction = [extractor.extract() for i in range(6)][-1] + self.assertEqual(extraction.tags, ['finance', 'certified']) # type: ignore + + def test_do_not_extract_dbt_lineage(self) -> None: + """ + Test that table level lineage is not extracted from dbt + """ + config_dict = { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name.upper(), + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': self.catalog_file_loc, + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': self.manifest_data, + f'extractor.dbt.{DbtExtractor.EXTRACT_LINEAGE}': False + } + conf = ConfigFactory.from_dict(config_dict) + extractor = DbtExtractor() + extractor.init(Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) + + has_next = True + while has_next: + extraction = extractor.extract() + self.assertFalse(isinstance(extraction, TableLineage)) + if extraction is None: + break + + def test_alias_for_table_name(self) -> None: + """ + Test that table level lineage is not extracted from dbt + """ + config_dict = { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name.upper(), + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': self.catalog_file_loc, + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': self.manifest_data, + f'extractor.dbt.{DbtExtractor.MODEL_NAME_KEY}': 'alias' + } + conf = ConfigFactory.from_dict(config_dict) + extractor = DbtExtractor() + extractor.init(Scoped.get_scoped_conf(conf=conf, + scope=extractor.get_scope())) + + result = extractor.extract() + known_alias = 'cost_summary' # One table aliased as "cost_summary" + known_alias_cnt = 0 + while result: + if isinstance(result, TableMetadata): + self.assertNotEqual(result.name, 'fact_daily_expenses') + if result.name == known_alias: + known_alias_cnt += 1 + result = extractor.extract() + self.assertEqual(known_alias_cnt, 1) + + def test_filter_schema_name(self) -> None: + """ + Test that table level lineage is not extracted from dbt + """ + config_dict = { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name.upper(), + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': self.catalog_file_loc, + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': self.manifest_data, + f'extractor.dbt.{DbtExtractor.EXTRACT_LINEAGE}': False, + f'extractor.dbt.{DbtExtractor.SCHEMA_FILTER}': 'other_schema_value' + } + conf = ConfigFactory.from_dict(config_dict) + extractor = DbtExtractor() + extractor.init(Scoped.get_scoped_conf(conf=conf, + scope=extractor.get_scope())) + + # Tests currently have 1 schema defined + result = extractor.extract() + self.assertEqual(result, None) + + def test_invalid_dbt_inputs(self) -> None: + """ + Test that table level lineage is not extracted from dbt + """ + missing_inputs = [ + { + # f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name.upper(), + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': self.catalog_file_loc, + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': self.manifest_data + }, + { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name.upper(), + # f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': self.catalog_file_loc, + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': self.manifest_data + }, + { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name.upper(), + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': self.catalog_file_loc, + # f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': self.manifest_data + } + ] + for missing_input_config in missing_inputs: + conf = ConfigFactory.from_dict(missing_input_config) + extractor = DbtExtractor() + with pytest.raises(pyhocon.exceptions.ConfigMissingException): + extractor.init(Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) + + # Invalid manifest.json and invalid catalog.json + invalid_file_jsons = [ + { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name.upper(), + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': 'not a real file location or json', + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': self.manifest_data + }, + { + f'extractor.dbt.{DbtExtractor.DATABASE_NAME}': self.database_name.upper(), + f'extractor.dbt.{DbtExtractor.CATALOG_JSON}': self.catalog_file_loc, + f'extractor.dbt.{DbtExtractor.MANIFEST_JSON}': 'not a real file location or json' + } + ] + for invalid_conf in invalid_file_jsons: + conf = ConfigFactory.from_dict(invalid_conf) + extractor = DbtExtractor() + with pytest.raises(InvalidDbtInputs): + extractor.init(Scoped.get_scoped_conf(conf=conf, scope=extractor.get_scope())) diff --git a/databuilder/tests/unit/rest_api/test_query_merger.py b/databuilder/tests/unit/rest_api/test_query_merger.py new file mode 100644 index 0000000000..81b892745d --- /dev/null +++ b/databuilder/tests/unit/rest_api/test_query_merger.py @@ -0,0 +1,99 @@ +# Copyright Contributors to the Amundsen project. +# SPDX-License-Identifier: Apache-2.0 + +import unittest + +from mock import patch + +from databuilder.rest_api.base_rest_api_query import RestApiQuerySeed +from databuilder.rest_api.query_merger import QueryMerger +from databuilder.rest_api.rest_api_query import RestApiQuery + + +class TestQueryMerger(unittest.TestCase): + def setUp(self) -> None: + query_to_join_seed_record = [ + {'foo1': 'bar1', 'dashboard_id': 'd1'}, + {'foo2': 'bar2', 'dashboard_id': 'd3'} + ] + self.query_to_join = RestApiQuerySeed(seed_record=query_to_join_seed_record) + self.json_path = 'foo.name' + self.field_names = ['name_field'] + self.url = 'foobar' + + def test_ensure_record_get_updated(self) -> None: + query_to_merge_seed_record = [ + {'organization': 'amundsen', 'dashboard_id': 'd1'}, + {'organization': 'amundsen-databuilder', 'dashboard_id': 'd2'}, + {'organization': 'amundsen-dashboard', 'dashboard_id': 'd3'}, + ] + query_to_merge = RestApiQuerySeed(seed_record=query_to_merge_seed_record) + query_merger = QueryMerger(query_to_merge=query_to_merge, merge_key='dashboard_id') + + with patch('databuilder.rest_api.rest_api_query.requests.get') as mock_get: + mock_get.return_value.json.side_effect = [ + {'foo': {'name': 'john'}}, + {'foo': {'name': 'doe'}}, + ] + query = RestApiQuery(query_to_join=self.query_to_join, url=self.url, params={}, + json_path=self.json_path, field_names=self.field_names, + query_merger=query_merger) + results = list(query.execute()) + self.assertEqual(len(results), 2) + self.assertDictEqual( + {'dashboard_id': 'd1', 'foo1': 'bar1', 'name_field': 'john', 'organization': 'amundsen'}, + results[0], + ) + self.assertDictEqual( + {'dashboard_id': 'd3', 'foo2': 'bar2', 'name_field': 'doe', 'organization': 'amundsen-dashboard'}, + results[1], + ) + + def test_exception_rasied_with_duplicate_merge_key(self) -> None: + """ + Two records in query_to_merge results have {'dashboard_id': 'd2'}, + exception should be raised + """ + query_to_merge_seed_record = [ + {'organization': 'amundsen', 'dashboard_id': 'd1'}, + {'organization': 'amundsen-databuilder', 'dashboard_id': 'd2'}, + {'organization': 'amundsen-dashboard', 'dashboard_id': 'd2'}, + ] + query_to_merge = RestApiQuerySeed(seed_record=query_to_merge_seed_record) + query_merger = QueryMerger(query_to_merge=query_to_merge, merge_key='dashboard_id') + + with patch('databuilder.rest_api.rest_api_query.requests.get') as mock_get: + mock_get.return_value.json.side_effect = [ + {'foo': {'name': 'john'}}, + {'foo': {'name': 'doe'}}, + ] + query = RestApiQuery(query_to_join=self.query_to_join, url=self.url, params={}, + json_path=self.json_path, field_names=self.field_names, + query_merger=query_merger) + self.assertRaises(Exception, query.execute()) + + def test_exception_raised_with_missing_merge_key(self) -> None: + """ + No record in query_to_merge results contains {'dashboard_id': 'd3'}, + exception should be raised + """ + query_to_merge_seed_record = [ + {'organization': 'amundsen', 'dashboard_id': 'd1'}, + {'organization': 'amundsen-databuilder', 'dashboard_id': 'd2'}, + ] + query_to_merge = RestApiQuerySeed(seed_record=query_to_merge_seed_record) + query_merger = QueryMerger(query_to_merge=query_to_merge, merge_key='dashboard_id') + + with patch('databuilder.rest_api.rest_api_query.requests.get') as mock_get: + mock_get.return_value.json.side_effect = [ + {'foo': {'name': 'john'}}, + {'foo': {'name': 'doe'}}, + ] + query = RestApiQuery(query_to_join=self.query_to_join, url=self.url, params={}, + json_path=self.json_path, field_names=self.field_names, + query_merger=query_merger) + self.assertRaises(Exception, query.execute()) + + +if __name__ == '__main__': + unittest.main() diff --git a/docs/common b/docs/common index 3ffe4e016f..60d3b0a6a8 120000 --- a/docs/common +++ b/docs/common @@ -1 +1 @@ -../amundsencommon \ No newline at end of file +../common \ No newline at end of file diff --git a/docs/databuilder b/docs/databuilder index 2a7c9cfb59..1df4f0fd95 120000 --- a/docs/databuilder +++ b/docs/databuilder @@ -1 +1 @@ -../amundsendatabuilder \ No newline at end of file +../databuilder \ No newline at end of file diff --git a/docs/faq.md b/docs/faq.md index 3f5dc1136a..7cbb12e8c0 100644 --- a/docs/faq.md +++ b/docs/faq.md @@ -3,19 +3,21 @@ ## How to select between Neo4j and Atlas as backend for Amundsen? ### Why Neo4j? -1. Amundsen has direct influence over the data model if you use neo4j. This, at least initially, will benefit the speed by which new features in amundsen can arrive -2. Atlas is developed with data governance in mind and not with data discovery. You could view "slapping amundsen on top of Atlas" as a kind of Frankenstein: never able to properly able to cater to your audience -3. Atlas seems to have a slow development cycle and it's community is not very responsive although some small improvements have been made -4. Atlas has the "Hadoop" era "smell" which isn't considered very sexy nowadays -5. Neo4j for it is the market leader in Graph database and also was proven by Airbnb’s Data portal on their Data discovery tool. +1. Amundsen has direct influence over the data model if you use neo4j. This, at least initially, will benefit the speed by which new features in amundsen can arrive. +2. Neo4j for it is the market leader in Graph database and also was proven by Airbnb’s Data portal on their Data discovery tool. ### Why Atlas? 1. Atlas has lineage support already available. It's been tried and tested. -2. Tag propagation is supported -3. It has a robust authentication and authorization system -4. Atlas does data governance adding amundsen for discovery makes it best of both worlds -5. It has support for push based due to its many plugins -6. The free version of Neo4j does not have authorization support(Enterprise version does). Your question should actually be why use "neo4j over janusgraph" cause that is the right level of comparison. Atlas adds a whole bunch on top of the graph database. +2. Tag/Badge propagation is supported. +3. It has a robust authentication and authorization system. +4. Atlas does data governance adding amundsen for discovery makes it best of both worlds. +5. It has support for push based due to its many plugins. +6. The free version of Neo4j does not have authorization support (Enterprise version does). Your question should actually be why use "neo4j over janusgraph" cause that is the right level of comparison. Atlas adds a whole bunch on top of the graph database. + +#### Why not Atlas? +1. Atlas is developed with data governance in mind and not with data discovery. +2. Atlas seems to have a slow development cycle and it's community is not very responsive although some small improvements have been made. +3. Amundsen databuilder integration is not yet supported which puts more strain on the end user to populate the required entities. ## What are the prerequisites to use Apache Atlas as backend for Amundsen? To run Amundsen with Atlas, latest versions of following components should be used: @@ -55,4 +57,4 @@ To put a proxy in place to bypass any adblockers and capture all analytics, foll 1. Follow https://github.com/ZitRos/save-analytics-from-content-blockers#setup to set up your own proxy server. 2. In the same repository, run `npm run mask www.googletagmanager.com/gtag/js?id=UA-XXXXXXXXX` and save the output. 3. In your custom frontend, override https://github.com/amundsen-io/amundsenfrontendlibrary/blob/master/amundsen_application/static/templates/fragments/google-analytics-loader.html#L6 to -4. Now, note that network requests to www.googletagmanager.com will be sent from behind your masked proxy endpoint, saving your analytics from content blockers! \ No newline at end of file +4. Now, note that network requests to www.googletagmanager.com will be sent from behind your masked proxy endpoint, saving your analytics from content blockers! diff --git a/docs/frontend b/docs/frontend index 2dea3d0f0d..af288785f3 120000 --- a/docs/frontend +++ b/docs/frontend @@ -1 +1 @@ -../amundsenfrontendlibrary \ No newline at end of file +../frontend \ No newline at end of file diff --git a/docs/metadata b/docs/metadata index e1ee280544..5e99e5ae80 120000 --- a/docs/metadata +++ b/docs/metadata @@ -1 +1 @@ -../amundsenmetadatalibrary \ No newline at end of file +../metadata \ No newline at end of file diff --git a/docs/search b/docs/search index 26d0b91592..15a4dc32da 120000 --- a/docs/search +++ b/docs/search @@ -1 +1 @@ -../amundsensearchlibrary \ No newline at end of file +../search \ No newline at end of file diff --git a/frontend/amundsen_application/api/metadata/v0.py b/frontend/amundsen_application/api/metadata/v0.py index b6efa49d29..9ccc0338b7 100644 --- a/frontend/amundsen_application/api/metadata/v0.py +++ b/frontend/amundsen_application/api/metadata/v0.py @@ -800,7 +800,9 @@ def get_table_lineage() -> Response: try: table_endpoint = _get_table_endpoint() table_key = get_query_param(request.args, 'key') - url = f'{table_endpoint}/{table_key}/lineage' + depth = get_query_param(request.args, 'depth') + direction = get_query_param(request.args, 'direction') + url = f'{table_endpoint}/{table_key}/lineage?depth={depth}&direction={direction}' response = request_metadata(url=url, method=request.method) json = response.json() downstream = [marshall_lineage_table(table) for table in json.get('downstream_entities')] diff --git a/frontend/amundsen_application/static/js/config/config-default.ts b/frontend/amundsen_application/static/js/config/config-default.ts index 833f6dc739..bfe5b71ded 100644 --- a/frontend/amundsen_application/static/js/config/config-default.ts +++ b/frontend/amundsen_application/static/js/config/config-default.ts @@ -201,9 +201,11 @@ const configDefault: AppConfig = { }, }, tableLineage: { + inAppListEnabled: false, + inAppPageEnabled: false, + externalEnabled: false, iconPath: 'PATH_TO_ICON', isBeta: false, - isEnabled: false, urlGenerator: ( database: string, cluster: string, @@ -211,10 +213,10 @@ const configDefault: AppConfig = { table: string ) => `https://DEFAULT_LINEAGE_URL?schema=${schema}&cluster=${cluster}&db=${database}&table=${table}`, - inAppListEnabled: false, }, columnLineage: { inAppListEnabled: false, + inAppPageEnabled: false, urlGenerator: ( database: string, cluster: string, diff --git a/frontend/amundsen_application/static/js/config/config-types.ts b/frontend/amundsen_application/static/js/config/config-types.ts index 72a1eeb18c..1d091f63ae 100644 --- a/frontend/amundsen_application/static/js/config/config-types.ts +++ b/frontend/amundsen_application/static/js/config/config-types.ts @@ -281,14 +281,15 @@ interface TableProfileConfig { interface TableLineageConfig { iconPath: string; isBeta: boolean; - isEnabled: boolean; urlGenerator: ( database: string, cluster: string, schema: string, table: string ) => string; + externalEnabled: boolean; inAppListEnabled: boolean; + inAppPageEnabled: boolean; } /** @@ -298,6 +299,7 @@ interface TableLineageConfig { */ interface ColumnLineageConfig { inAppListEnabled: boolean; + inAppPageEnabled: boolean; urlGenerator: ( database: string, cluster: string, diff --git a/frontend/amundsen_application/static/js/config/config-utils.ts b/frontend/amundsen_application/static/js/config/config-utils.ts index fab88e6d45..4bc79f5cc4 100644 --- a/frontend/amundsen_application/static/js/config/config-utils.ts +++ b/frontend/amundsen_application/static/js/config/config-utils.ts @@ -331,6 +331,20 @@ export function isColumnListLineageEnabled() { return AppConfig.columnLineage.inAppListEnabled; } +/** + * Returns whether the in-app table lineage page is enabled. + */ +export function isTableLineagePageEnabled() { + return AppConfig.tableLineage.inAppPageEnabled; +} + +/** + * Returns whether the in-app column lineage page is enabled. + */ +export function isColumnLineagePageEnabled() { + return AppConfig.columnLineage.inAppPageEnabled; +} + /** * Returns the lineage link for a given column */ diff --git a/frontend/amundsen_application/static/js/config/index.spec.ts b/frontend/amundsen_application/static/js/config/index.spec.ts index 1802f70274..f2595edaa0 100644 --- a/frontend/amundsen_application/static/js/config/index.spec.ts +++ b/frontend/amundsen_application/static/js/config/index.spec.ts @@ -423,6 +423,22 @@ describe('isColumnListLineageEnabled', () => { }); }); +describe('isTableLineagePageEnabled', () => { + it('returns isTableLineagePageEnabled defined in config', () => { + const actual = ConfigUtils.isTableLineagePageEnabled(); + const expected = AppConfig.tableLineage.inAppPageEnabled; + expect(actual).toBe(expected); + }); +}); + +describe('isColumnLineagePageEnabled', () => { + it('returns isColumnLineagePageEnabled defined in config', () => { + const actual = ConfigUtils.isColumnLineagePageEnabled(); + const expected = AppConfig.columnLineage.inAppPageEnabled; + expect(actual).toBe(expected); + }); +}); + describe('getColumnLineageLink', () => { it('calls the column lineage link with the right params', () => { const tableData = { diff --git a/frontend/amundsen_application/static/js/ducks/lineage/api/api.spec.ts b/frontend/amundsen_application/static/js/ducks/lineage/api/api.spec.ts new file mode 100644 index 0000000000..cd6f092ff6 --- /dev/null +++ b/frontend/amundsen_application/static/js/ducks/lineage/api/api.spec.ts @@ -0,0 +1,58 @@ +import axios, { AxiosResponse } from 'axios'; + +import { tableLineage } from 'fixtures/metadata/table'; + +import * as API from './v0'; + +jest.mock('axios'); + +describe('getLineage', () => { + let axiosMockGet; + it('resolves with object containing table lineage and status code', async () => { + const mockStatus = 200; + const mockResponse = { + data: tableLineage, + status: mockStatus, + }; + axiosMockGet = jest + .spyOn(axios, 'get') + .mockImplementationOnce(() => Promise.resolve(mockResponse)); + expect.assertions(2); + await API.getTableLineage( + 'database://cluster.schema/table_name', + 1, + 'both' + ).then((processedResponse) => { + expect(processedResponse).toEqual({ + data: tableLineage, + statusCode: mockStatus, + }); + }); + expect(axiosMockGet).toHaveBeenCalled(); + }); + + it('catches error and resolves with object containing error information', async () => { + const mockStatus = 500; + const mockMessage = 'oops'; + const mockResponse = { + response: { + data: { + msg: mockMessage, + }, + status: mockStatus, + }, + }; + axiosMockGet = jest + .spyOn(axios, 'get') + .mockImplementationOnce(() => Promise.reject(mockResponse)); + expect.assertions(2); + await API.getTableLineage('testUri', 1, 'both').catch( + (processedResponse) => { + expect(processedResponse).toEqual({ + status: mockStatus, + }); + } + ); + expect(axiosMockGet).toHaveBeenCalled(); + }); +}); diff --git a/frontend/amundsen_application/static/js/ducks/lineage/api/v0.ts b/frontend/amundsen_application/static/js/ducks/lineage/api/v0.ts new file mode 100644 index 0000000000..d48cbc8f50 --- /dev/null +++ b/frontend/amundsen_application/static/js/ducks/lineage/api/v0.ts @@ -0,0 +1,47 @@ +import axios, { AxiosError, AxiosResponse } from 'axios'; +import { Lineage } from 'interfaces/Lineage'; +import { getQueryParams } from 'ducks/utilMethods'; + +export const API_PATH = '/api/metadata/v0'; + +export type LineageAPI = { lineage: Lineage }; + +export function getTableLineage(key: string, depth: number, direction: string) { + const tableQueryParams = getQueryParams({ key, depth, direction }); + return axios + .get(`${API_PATH}/get_table_lineage?${tableQueryParams}`) + .then((response: AxiosResponse) => ({ + data: response.data, + statusCode: response.status, + })) + .catch((e: AxiosError) => { + const { response } = e; + const status = response ? response.status : null; + return Promise.reject({ status }); + }); +} + +export function getColumnLineage( + key: string, + columnName: string, + depth: number, + direction: string +) { + const tableQueryParams = getQueryParams({ + key, + depth, + direction, + column_name: columnName, + }); + return axios + .get(`${API_PATH}/get_column_lineage?${tableQueryParams}`) + .then((response: AxiosResponse) => ({ + data: response.data, + statusCode: response.status, + })) + .catch((e: AxiosError) => { + const { response } = e; + const status = response ? response.status : null; + return Promise.reject({ status }); + }); +} diff --git a/frontend/amundsen_application/static/js/ducks/lineage/index.spec.ts b/frontend/amundsen_application/static/js/ducks/lineage/index.spec.ts new file mode 100644 index 0000000000..b1f26ace4c --- /dev/null +++ b/frontend/amundsen_application/static/js/ducks/lineage/index.spec.ts @@ -0,0 +1,208 @@ +import { testSaga } from 'redux-saga-test-plan'; + +import { Lineage } from 'interfaces'; + +import * as API from './api/v0'; + +import { + getTableLineage, + getTableLineageFailure, + getTableLineageSuccess, + getColumnLineage, + getColumnLineageFailure, + getColumnLineageSuccess, + getTableColumnLineageSuccess, + getTableColumnLineageFailure, + initialLineageState, +} from './reducer'; + +import { + getTableLineageWatcher, + getTableLineageWorker, + getColumnLineageWatcher, + getColumnLineageWorker, +} from './sagas'; + +import { + GetTableLineage, + GetColumnLineage, + GetTableColumnLineage, +} from './types'; + +describe('tableMetadata ducks', () => { + let testLineage: Lineage; + + let columnName: string; + let testKey: string; + + beforeAll(() => { + testKey = 'tableKey'; + testLineage = { + upstream_entities: [ + { + badges: [], + cluster: 'cluster', + database: 'database', + key: 'key', + level: 1, + name: 'name', + schema: 'schema', + usage: 100, + parent: 'parent', + }, + ], + downstream_entities: [], + depth: 1, + direction: 'both', + key: testKey, + }; + + columnName = 'column_name'; + }); + + describe('actions', () => { + it('getTableLineage - returns the action to get table lineage', () => { + const action = getTableLineage(testKey); + const { payload } = action; + expect(action.type).toBe(GetTableLineage.REQUEST); + expect(payload.key).toBe(testKey); + }); + + it('getTableLineage - returns the action to process failure', () => { + const status = 500; + const action = getTableLineageFailure(status); + const { payload } = action; + expect(action.type).toBe(GetTableLineage.FAILURE); + expect(payload.lineageTree).toBe(initialLineageState); + expect(payload.statusCode).toBe(status); + }); + + it('getTableLineage - returns the action to process success', () => { + const status = 200; + const action = getTableLineageSuccess(testLineage, status); + const { payload } = action; + expect(action.type).toBe(GetTableLineage.SUCCESS); + expect(payload.lineageTree).toBe(testLineage); + expect(payload.statusCode).toBe(status); + }); + + it('getColumnLineage - returns the action to get column lineage', () => { + const action = getColumnLineage(testKey, columnName); + const { payload, meta } = action; + expect(action.type).toBe(GetColumnLineage.REQUEST); + expect(payload.key).toBe(testKey); + expect(payload.columnName).toBe(columnName); + expect(meta.analytics).not.toBeNull(); + }); + + it('getColumnLineage - returns the action to process failure', () => { + const status = 500; + const action = getColumnLineageFailure(status); + const { payload } = action; + expect(action.type).toBe(GetColumnLineage.FAILURE); + expect(payload.lineageTree).toBe(initialLineageState); + expect(payload.statusCode).toBe(status); + }); + + it('getColumnLineageSuccess - returns the action to process success', () => { + const status = 200; + const action = getColumnLineageSuccess(testLineage, status); + const { payload } = action; + expect(action.type).toBe(GetColumnLineage.SUCCESS); + expect(payload.lineageTree).toBe(testLineage); + expect(payload.statusCode).toBe(status); + }); + + it('getTableColumnLineageSuccess - returns the action to process success', () => { + const status = 200; + const action = getTableColumnLineageSuccess( + testLineage, + columnName, + status + ); + const { payload } = action; + expect(action.type).toBe(GetTableColumnLineage.SUCCESS); + expect(payload.columnName).toBe(columnName); + expect(payload.lineageTree).toBe(testLineage); + expect(payload.statusCode).toBe(status); + }); + + it('getTableColumnLineage - returns the action to process failure', () => { + const status = 500; + const action = getTableColumnLineageFailure(columnName, status); + const { payload } = action; + expect(action.type).toBe(GetTableColumnLineage.FAILURE); + expect(payload.columnName).toBe(columnName); + expect(payload.lineageTree).toBe(initialLineageState); + expect(payload.statusCode).toBe(status); + }); + }); + + describe('sagas', () => { + describe('getTableLineageWatcher', () => { + it('takes every GetTableLineage.REQUEST with getTableLineageWorker', () => { + testSaga(getTableLineageWatcher) + .next() + .takeEvery(GetTableLineage.REQUEST, getTableLineageWorker) + .next() + .isDone(); + }); + }); + + describe('getTableLineageWorker', () => { + it('executes flow for getting table lineage', () => { + testSaga(getTableLineageWorker, getTableLineage(testKey)) + .next() + .call(API.getTableLineage, testKey, 1, 'both') + .next({ data: testLineage, status: 200 }) + .put(getTableLineageSuccess(testLineage, 200)) + .next() + .isDone(); + }); + + it('handles request error', () => { + testSaga(getTableLineageWorker, getTableLineage(testKey)) + .next() + .call(API.getTableLineage, testKey, 1, 'both') + // @ts-ignore + .throw({ status: 500 }) + .put(getTableLineageFailure(500)) + .next() + .isDone(); + }); + }); + + describe('getColumnLineageWatcher', () => { + it('takes every GetColumnLineage.REQUEST with getColumnLineageWorker', () => { + testSaga(getColumnLineageWatcher) + .next() + .takeEvery(GetColumnLineage.REQUEST, getColumnLineageWorker) + .next() + .isDone(); + }); + }); + + describe('getColumnLineageWorker', () => { + it('executes flow for getting column lineage', () => { + testSaga(getColumnLineageWorker, getColumnLineage(testKey, columnName)) + .next() + .call(API.getColumnLineage, testKey, columnName, 1, 'both') + .next({ data: testLineage, status: 200 }) + .put(getColumnLineageSuccess(testLineage, 200)) + .next() + .isDone(); + }); + + it('handles request error', () => { + testSaga(getColumnLineageWorker, getColumnLineage(testKey, columnName)) + .next() + .call(API.getColumnLineage, testKey, columnName, 1, 'both') + // @ts-ignore + .throw({ status: 500 }) + .put(getColumnLineageFailure(500)) + .next() + .isDone(); + }); + }); + }); +}); diff --git a/frontend/amundsen_application/static/js/ducks/lineage/reducer.ts b/frontend/amundsen_application/static/js/ducks/lineage/reducer.ts new file mode 100644 index 0000000000..992e6e16e3 --- /dev/null +++ b/frontend/amundsen_application/static/js/ducks/lineage/reducer.ts @@ -0,0 +1,227 @@ +import { ColumnLineageMap, Lineage } from 'interfaces/Lineage'; +import { + GetColumnLineage, + GetColumnLineageRequest, + GetColumnLineageResponse, + GetTableLineage, + GetTableLineageRequest, + GetTableLineageResponse, + GetTableColumnLineage, + GetTableColumnLineageRequest, + GetTableColumnLineageResponse, +} from './types'; + +export const initialLineageState = { + upstream_entities: [], + downstream_entities: [], + depth: 0, + direction: 'both', + key: '', +}; + +export const initialState: LineageReducerState = { + lineageTree: initialLineageState, + statusCode: null, + isLoading: false, + // ToDo: Please remove once list based view is deprecated + columnLineageMap: {}, +}; + +/* ACTIONS */ +export function getTableLineage( + key: string, + depth: number = 1, + direction: string = 'both' +): GetTableLineageRequest { + return { + type: GetTableLineage.REQUEST, + payload: { key, depth, direction }, + }; +} + +export function getTableLineageSuccess( + data: Lineage, + statusCode: number +): GetTableLineageResponse { + return { + type: GetTableLineage.SUCCESS, + payload: { + lineageTree: data, + statusCode, + }, + }; +} + +export function getTableLineageFailure( + statusCode: number +): GetTableLineageResponse { + return { + type: GetTableLineage.FAILURE, + payload: { + lineageTree: initialLineageState, + statusCode, + }, + }; +} + +export function getColumnLineage( + key: string, + columnName: string, + depth: number = 1, + direction: string = 'both' +): GetColumnLineageRequest { + return { + type: GetColumnLineage.REQUEST, + payload: { key, depth, direction, columnName }, + meta: { + analytics: { + name: `getColumnLineage`, + payload: { + category: 'lineage', + label: `${key}/${columnName}`, + }, + }, + }, + }; +} + +export function getColumnLineageSuccess( + data: Lineage, + statusCode: number +): GetColumnLineageResponse { + return { + type: GetColumnLineage.SUCCESS, + payload: { + lineageTree: data, + statusCode, + }, + }; +} + +export function getColumnLineageFailure( + statusCode: number +): GetColumnLineageResponse { + return { + type: GetColumnLineage.FAILURE, + payload: { + lineageTree: initialLineageState, + statusCode, + }, + }; +} + +// ToDo: Please remove once list based view is deprecated +export function getTableColumnLineage( + key: string, + columnName: string +): GetTableColumnLineageRequest { + return { + type: GetTableColumnLineage.REQUEST, + payload: { key, columnName }, + meta: { + analytics: { + name: `getTableColumnLineage`, + payload: { + category: 'lineage', + label: `${key}/${columnName}`, + }, + }, + }, + }; +} + +// ToDo: Please remove once list based view is deprecated +export function getTableColumnLineageSuccess( + data: Lineage, + columnName: string, + statusCode: number +): GetTableColumnLineageResponse { + return { + type: GetTableColumnLineage.SUCCESS, + payload: { + columnName, + statusCode, + lineageTree: data, + }, + }; +} +// ToDo: Please remove once list based view is deprecated +export function getTableColumnLineageFailure( + columnName: string, + statusCode: number +): GetTableColumnLineageResponse { + return { + type: GetTableColumnLineage.FAILURE, + payload: { + columnName, + lineageTree: initialLineageState, + statusCode, + }, + }; +} + +/* REDUCER */ +export interface LineageReducerState { + statusCode: number | null; + lineageTree: Lineage; + isLoading: boolean; + // ToDo: Please remove once list based view is deprecated + columnLineageMap: ColumnLineageMap; +} + +export default function reducer( + state: LineageReducerState = initialState, + action +): LineageReducerState { + switch (action.type) { + case GetTableLineage.SUCCESS: + case GetTableLineage.FAILURE: + return { + ...state, + lineageTree: (action).payload.lineageTree, + statusCode: (action).payload.statusCode, + isLoading: false, + }; + case GetColumnLineage.SUCCESS: + case GetColumnLineage.FAILURE: + return { + ...state, + lineageTree: (action).payload.lineageTree, + statusCode: (action).payload.statusCode, + isLoading: false, + }; + // ToDo: Please remove once list based view is deprecated + case GetTableColumnLineage.REQUEST: { + const { columnName } = (action).payload; + return { + ...state, + columnLineageMap: { + ...state.columnLineageMap, + [columnName]: { + lineageTree: initialLineageState, + isLoading: true, + }, + }, + }; + } + // ToDo: Please remove once list based view is deprecated + case GetTableColumnLineage.SUCCESS: + case GetTableColumnLineage.FAILURE: { + const { columnName, lineageTree: columnLineage } = (< + GetTableColumnLineageResponse + >action).payload; + return { + ...state, + columnLineageMap: { + ...state.columnLineageMap, + [columnName]: { + lineageTree: columnLineage, + isLoading: false, + }, + }, + }; + } + default: + return state; + } +} diff --git a/frontend/amundsen_application/static/js/ducks/lineage/sagas.ts b/frontend/amundsen_application/static/js/ducks/lineage/sagas.ts new file mode 100644 index 0000000000..95d5b26edb --- /dev/null +++ b/frontend/amundsen_application/static/js/ducks/lineage/sagas.ts @@ -0,0 +1,85 @@ +import { SagaIterator } from 'redux-saga'; +import { call, put, takeEvery } from 'redux-saga/effects'; +import * as API from './api/v0'; +import { + getColumnLineageFailure, + getColumnLineageSuccess, + getTableLineageFailure, + getTableLineageSuccess, + getTableColumnLineageSuccess, + getTableColumnLineageFailure, +} from './reducer'; + +import { + GetColumnLineage, + GetColumnLineageRequest, + GetTableLineage, + GetTableLineageRequest, + GetTableColumnLineage, + GetTableColumnLineageRequest, +} from './types'; + +export function* getTableLineageWorker( + action: GetTableLineageRequest +): SagaIterator { + const { key, depth, direction } = action.payload; + try { + const response = yield call(API.getTableLineage, key, depth, direction); + const { data, status } = response; + yield put(getTableLineageSuccess(data, status)); + } catch (error) { + const { status } = error; + yield put(getTableLineageFailure(status)); + } +} +export function* getTableLineageWatcher(): SagaIterator { + yield takeEvery(GetTableLineage.REQUEST, getTableLineageWorker); +} + +export function* getColumnLineageWorker( + action: GetColumnLineageRequest +): SagaIterator { + const { key, columnName, depth, direction } = action.payload; + try { + const response = yield call( + API.getColumnLineage, + key, + columnName, + depth, + direction + ); + const { data, status } = response; + yield put(getColumnLineageSuccess(data, status)); + } catch (error) { + const { status } = error; + yield put(getColumnLineageFailure(status)); + } +} +export function* getColumnLineageWatcher(): SagaIterator { + yield takeEvery(GetColumnLineage.REQUEST, getColumnLineageWorker); +} + +// ToDo: Please remove once list based view is deprecated +export function* getTableColumnLineageWorker( + action: GetTableColumnLineageRequest +): SagaIterator { + const { key, columnName } = action.payload; + try { + const response = yield call( + API.getColumnLineage, + key, + columnName, + 1, + 'both' + ); + const { data, status } = response; + yield put(getTableColumnLineageSuccess(data, columnName, status)); + } catch (error) { + const { status } = error; + yield put(getTableColumnLineageFailure(columnName, status)); + } +} +// ToDo: Please remove once list based view is deprecated +export function* getTableColumnLineageWatcher(): SagaIterator { + yield takeEvery(GetTableColumnLineage.REQUEST, getTableColumnLineageWorker); +} diff --git a/frontend/amundsen_application/static/js/ducks/lineage/types.ts b/frontend/amundsen_application/static/js/ducks/lineage/types.ts new file mode 100644 index 0000000000..676ba4e0a9 --- /dev/null +++ b/frontend/amundsen_application/static/js/ducks/lineage/types.ts @@ -0,0 +1,73 @@ +import { Lineage, AnalyticsEvent } from 'interfaces'; + +export enum GetTableLineage { + REQUEST = 'amundsen/lineage/GET_TABLE_LINEAGE_REQUEST', + SUCCESS = 'amundsen/lineage/GET_TABLE_LINEAGE_SUCCESS', + FAILURE = 'amundsen/lineage/GET_TABLE_LINEAGE_FAILURE', +} +export interface GetTableLineageRequest { + type: GetTableLineage.REQUEST; + payload: { + key: string; + direction: string; + depth: number; + }; +} +export interface GetTableLineageResponse { + type: GetTableLineage.SUCCESS | GetTableLineage.FAILURE; + payload: { + lineageTree: Lineage; + statusCode: number; + }; +} + +export enum GetColumnLineage { + REQUEST = 'amundsen/lineage/GET_COLUMN_LINEAGE_REQUEST', + SUCCESS = 'amundsen/lineage/GET_COLUMN_LINEAGE_SUCCESS', + FAILURE = 'amundsen/lineage/GET_COLUMN_LINEAGE_FAILURE', +} +export interface GetColumnLineageRequest { + type: GetColumnLineage.REQUEST; + payload: { + key: string; + columnName: string; + direction: string; + depth: number; + }; + meta: { + analytics: AnalyticsEvent; + }; +} +export interface GetColumnLineageResponse { + type: GetColumnLineage.SUCCESS | GetColumnLineage.FAILURE; + payload: { + lineageTree: Lineage; + statusCode: number; + }; +} + +// To keep the backward compatibility for the list based lineage on table detail page +// ToDo: Please remove once list based view is deprecated +export enum GetTableColumnLineage { + REQUEST = 'amundsen/tableMetadata/GET_COLUMN_LINEAGE_REQUEST', + SUCCESS = 'amundsen/tableMetadata/GET_COLUMN_LINEAGE_SUCCESS', + FAILURE = 'amundsen/tableMetadata/GET_COLUMN_LINEAGE_FAILURE', +} +export interface GetTableColumnLineageRequest { + type: GetTableColumnLineage.REQUEST; + payload: { + key: string; + columnName: string; + }; + meta: { + analytics: AnalyticsEvent; + }; +} +export interface GetTableColumnLineageResponse { + type: GetTableColumnLineage.SUCCESS | GetTableColumnLineage.FAILURE; + payload: { + lineageTree: Lineage; + columnName: string; + statusCode: number; + }; +} diff --git a/frontend/amundsen_application/static/js/ducks/rootReducer.ts b/frontend/amundsen_application/static/js/ducks/rootReducer.ts index a94b2ae4a4..74d3ebe0ea 100644 --- a/frontend/amundsen_application/static/js/ducks/rootReducer.ts +++ b/frontend/amundsen_application/static/js/ducks/rootReducer.ts @@ -20,6 +20,7 @@ import ui, { UIReducerState } from './ui'; import bookmarks, { BookmarkReducerState } from './bookmark/reducer'; import notification, { NotificationReducerState } from './notification/reducer'; import issue, { IssueReducerState } from './issue/reducer'; +import lineage, { LineageReducerState } from './lineage/reducer'; export interface GlobalState { announcements: AnnouncementsReducerState; @@ -35,6 +36,7 @@ export interface GlobalState { tags: TagsReducerState; user: UserReducerState; ui: UIReducerState; + lineage: LineageReducerState; } const rootReducer = combineReducers({ @@ -51,6 +53,7 @@ const rootReducer = combineReducers({ tags, user, ui, + lineage, }); export default rootReducer; diff --git a/frontend/amundsen_application/static/js/ducks/rootSaga.ts b/frontend/amundsen_application/static/js/ducks/rootSaga.ts index 02f480a6e5..30d0fc1610 100644 --- a/frontend/amundsen_application/static/js/ducks/rootSaga.ts +++ b/frontend/amundsen_application/static/js/ducks/rootSaga.ts @@ -43,9 +43,7 @@ import { filterWatcher } from './search/filters/sagas'; import { updateTableOwnerWatcher } from './tableMetadata/owners/sagas'; import { getTableDataWatcher, - getTableLineageWatcher, getColumnDescriptionWatcher, - getColumnLineageWatcher, getPreviewDataWatcher, getTableDescriptionWatcher, updateColumnDescriptionWatcher, @@ -66,6 +64,13 @@ import { getUserWatcher, } from './user/sagas'; +// Lineage +import { + getColumnLineageWatcher, + getTableLineageWatcher, + getTableColumnLineageWatcher, +} from './lineage/sagas'; + export default function* rootSaga() { yield all([ // AnnouncementPage @@ -103,9 +108,9 @@ export default function* rootSaga() { updateResourceTagsWatcher(), // TableDetail getTableDataWatcher(), - getTableLineageWatcher(), + getColumnDescriptionWatcher(), - getColumnLineageWatcher(), + getPreviewDataWatcher(), getTableDescriptionWatcher(), updateColumnDescriptionWatcher(), @@ -118,5 +123,9 @@ export default function* rootSaga() { getUserWatcher(), getUserOwnWatcher(), getUserReadWatcher(), + // Lineage + getTableLineageWatcher(), + getColumnLineageWatcher(), + getTableColumnLineageWatcher(), ]); } diff --git a/frontend/amundsen_application/static/js/ducks/tableMetadata/api/v0.ts b/frontend/amundsen_application/static/js/ducks/tableMetadata/api/v0.ts index c798f4223f..b0ac9f1691 100644 --- a/frontend/amundsen_application/static/js/ducks/tableMetadata/api/v0.ts +++ b/frontend/amundsen_application/static/js/ducks/tableMetadata/api/v0.ts @@ -182,40 +182,3 @@ export function getPreviewData(queryParams: PreviewQueryParams) { return Promise.reject({ data, status }); }); } - -export function getTableLineage(key: string) { - const tableQueryParams = getTableQueryParams({ key }); - return axios({ - url: `${API_PATH}/get_table_lineage?${tableQueryParams}`, - method: 'GET', - }) - .then((response: AxiosResponse) => ({ - data: response.data, - status: response.status, - })) - .catch((e: AxiosError) => { - const { response } = e; - const status = response ? response.status : null; - return Promise.reject({ status }); - }); -} - -export function getColumnLineage(key: string, columnName: string) { - const tableQueryParams = getTableQueryParams({ - key, - column_name: columnName, - }); - return axios({ - url: `${API_PATH}/get_column_lineage?${tableQueryParams}`, - method: 'GET', - }) - .then((response: AxiosResponse) => ({ - data: response.data, - status: response.status, - })) - .catch((e: AxiosError) => { - const { response } = e; - const status = response ? response.status : null; - return Promise.reject({ status }); - }); -} diff --git a/frontend/amundsen_application/static/js/ducks/tableMetadata/index.spec.ts b/frontend/amundsen_application/static/js/ducks/tableMetadata/index.spec.ts index c9c524156c..54a1eec26f 100644 --- a/frontend/amundsen_application/static/js/ducks/tableMetadata/index.spec.ts +++ b/frontend/amundsen_application/static/js/ducks/tableMetadata/index.spec.ts @@ -1,7 +1,6 @@ import { testSaga } from 'redux-saga-test-plan'; import { - Lineage, PreviewData, PreviewQueryParams, TableMetadata, @@ -33,13 +32,6 @@ import reducer, { initialTableDataState, initialState, TableMetadataReducerState, - getTableLineage, - getTableLineageFailure, - initialTableLineageState, - getTableLineageSuccess, - getColumnLineage, - getColumnLineageFailure, - getColumnLineageSuccess, } from './reducer'; import { @@ -55,10 +47,6 @@ import { updateColumnDescriptionWorker, getPreviewDataWatcher, getPreviewDataWorker, - getTableLineageWatcher, - getTableLineageWorker, - getColumnLineageWatcher, - getColumnLineageWorker, } from './sagas'; import { @@ -68,8 +56,6 @@ import { GetColumnDescription, UpdateColumnDescription, GetPreviewData, - GetTableLineage, - GetColumnLineage, } from './types'; describe('tableMetadata ducks', () => { @@ -82,7 +68,6 @@ describe('tableMetadata ducks', () => { let testKey: string; let testIndex: string; let testSource: string; - let testLineage: Lineage; let columnName: string; let columnIndex: number; @@ -113,22 +98,6 @@ describe('tableMetadata ducks', () => { testKey = 'tableKey'; testIndex = '3'; testSource = 'search'; - testLineage = { - upstream_entities: [ - { - badges: [], - cluster: 'cluster', - database: 'database', - key: 'key', - level: 1, - name: 'name', - schema: 'schema', - usage: 100, - }, - ], - downstream_entities: [], - }; - columnIndex = 2; columnName = 'column_name'; emptyPreviewData = { @@ -289,60 +258,6 @@ describe('tableMetadata ducks', () => { expect(payload.data).toBe(previewData); expect(payload.status).toBe(status); }); - - it('getTableLineage - returns the action to get table lineage', () => { - const action = getTableLineage(testKey); - const { payload } = action; - expect(action.type).toBe(GetTableLineage.REQUEST); - expect(payload.key).toBe(testKey); - }); - - it('getTableLineage - returns the action to process failure', () => { - const status = 500; - const action = getTableLineageFailure(status); - const { payload } = action; - expect(action.type).toBe(GetTableLineage.FAILURE); - expect(payload.lineage).toBe(initialTableLineageState.lineage); - expect(payload.status).toBe(status); - }); - - it('getTableLineage - returns the action to process success', () => { - const status = 200; - const action = getTableLineageSuccess(testLineage, status); - const { payload } = action; - expect(action.type).toBe(GetTableLineage.SUCCESS); - expect(payload.lineage).toBe(testLineage); - expect(payload.status).toBe(status); - }); - - it('getColumnLineage - returns the action to get column lineage', () => { - const action = getColumnLineage(testKey, columnName); - const { payload, meta } = action; - expect(action.type).toBe(GetColumnLineage.REQUEST); - expect(payload.key).toBe(testKey); - expect(payload.columnName).toBe(columnName); - expect(meta.analytics).not.toBeNull(); - }); - - it('getColumnLineage - returns the action to process failure', () => { - const status = 500; - const action = getColumnLineageFailure(columnName, status); - const { payload } = action; - expect(action.type).toBe(GetColumnLineage.FAILURE); - expect(payload.columnName).toBe(columnName); - expect(payload.lineage).toBe(initialTableLineageState.lineage); - expect(payload.status).toBe(status); - }); - - it('getColumnLineage - returns the action to process success', () => { - const status = 200; - const action = getColumnLineageSuccess(testLineage, columnName, status); - const { payload } = action; - expect(action.type).toBe(GetColumnLineage.SUCCESS); - expect(payload.columnName).toBe(columnName); - expect(payload.lineage).toBe(testLineage); - expect(payload.status).toBe(status); - }); }); /* TODO: Code involving nested reducers is not covered, will need more investigation */ @@ -786,71 +701,5 @@ describe('tableMetadata ducks', () => { .isDone(); }); }); - - describe('getTableLineageWatcher', () => { - it('takes every GetTableLineage.REQUEST with getTableLineageWorker', () => { - testSaga(getTableLineageWatcher) - .next() - .takeEvery(GetTableLineage.REQUEST, getTableLineageWorker) - .next() - .isDone(); - }); - }); - - describe('getTableLineageWorker', () => { - it('executes flow for getting table lineage', () => { - testSaga(getTableLineageWorker, getTableLineage(testKey)) - .next() - .call(API.getTableLineage, testKey) - .next({ data: testLineage, status: 200 }) - .put(getTableLineageSuccess(testLineage, 200)) - .next() - .isDone(); - }); - - it('handles request error', () => { - testSaga(getTableLineageWorker, getTableLineage(testKey)) - .next() - .call(API.getTableLineage, testKey) - // @ts-ignore - .throw({ status: 500 }) - .put(getTableLineageFailure(500)) - .next() - .isDone(); - }); - }); - - describe('getColumnLineageWatcher', () => { - it('takes every GetColumnLineage.REQUEST with getColumnLineageWorker', () => { - testSaga(getColumnLineageWatcher) - .next() - .takeEvery(GetColumnLineage.REQUEST, getColumnLineageWorker) - .next() - .isDone(); - }); - }); - - describe('getColumnLineageWorker', () => { - it('executes flow for getting column lineage', () => { - testSaga(getColumnLineageWorker, getColumnLineage(testKey, columnName)) - .next() - .call(API.getColumnLineage, testKey, columnName) - .next({ data: testLineage, status: 200 }) - .put(getColumnLineageSuccess(testLineage, columnName, 200)) - .next() - .isDone(); - }); - - it('handles request error', () => { - testSaga(getColumnLineageWorker, getColumnLineage(testKey, columnName)) - .next() - .call(API.getColumnLineage, testKey, columnName) - // @ts-ignore - .throw({ status: 500 }) - .put(getColumnLineageFailure(columnName, 500)) - .next() - .isDone(); - }); - }); }); }); diff --git a/frontend/amundsen_application/static/js/ducks/tableMetadata/reducer.ts b/frontend/amundsen_application/static/js/ducks/tableMetadata/reducer.ts index 9848ac6633..6788772c21 100644 --- a/frontend/amundsen_application/static/js/ducks/tableMetadata/reducer.ts +++ b/frontend/amundsen_application/static/js/ducks/tableMetadata/reducer.ts @@ -1,7 +1,5 @@ import { - ColumnLineageMap, DashboardResource, - Lineage, OwnerDict, PreviewData, PreviewQueryParams, @@ -29,12 +27,6 @@ import { GetPreviewDataRequest, GetPreviewDataResponse, UpdateTableOwner, - GetTableLineageResponse, - GetTableLineage, - GetTableLineageRequest, - GetColumnLineageResponse, - GetColumnLineage, - GetColumnLineageRequest, } from './types'; import tableOwnersReducer, { @@ -68,27 +60,14 @@ export const initialTableDataState: TableMetadata = { programmatic_descriptions: {}, }; -export const initialTableLineageState = { - lineage: { - upstream_entities: [], - downstream_entities: [], - }, - status: null, -}; - -export const emptyLineage = { - upstream_entities: [], - downstream_entities: [], -}; - export const initialState: TableMetadataReducerState = { isLoading: true, preview: initialPreviewState, statusCode: null, tableData: initialTableDataState, tableOwners: initialOwnersState, - tableLineage: initialTableLineageState, - columnLineageMap: {}, + // tableLineage: initialTableLineageState, + // columnLineageMap: {}, }; /* ACTIONS */ @@ -279,86 +258,6 @@ export function getPreviewDataSuccess( }; } -export function getTableLineage(key: string): GetTableLineageRequest { - return { - type: GetTableLineage.REQUEST, - payload: { key }, - }; -} - -export function getTableLineageSuccess( - data: Lineage, - status: number -): GetTableLineageResponse { - return { - type: GetTableLineage.SUCCESS, - payload: { - lineage: data, - status, - }, - }; -} - -export function getTableLineageFailure( - status: number -): GetTableLineageResponse { - return { - type: GetTableLineage.FAILURE, - payload: { - lineage: initialTableLineageState.lineage, - status, - }, - }; -} - -export function getColumnLineage( - key: string, - columnName: string -): GetColumnLineageRequest { - return { - type: GetColumnLineage.REQUEST, - payload: { key, columnName }, - meta: { - analytics: { - name: `getColumnLineage`, - payload: { - category: 'lineage', - label: `${key}/${columnName}`, - }, - }, - }, - }; -} - -export function getColumnLineageSuccess( - data: Lineage, - columnName: string, - status: number -): GetColumnLineageResponse { - return { - type: GetColumnLineage.SUCCESS, - payload: { - columnName, - status, - lineage: data, - }, - }; -} - -export function getColumnLineageFailure( - columnName: string, - status: number -): GetColumnLineageResponse { - return { - type: GetColumnLineage.FAILURE, - payload: { - columnName, - lineage: initialTableLineageState.lineage, - status, - }, - }; -} - /* REDUCER */ export interface TableMetadataReducerState { dashboards?: { @@ -374,11 +273,6 @@ export interface TableMetadataReducerState { statusCode: number | null; tableData: TableMetadata; tableOwners: TableOwnerReducerState; - tableLineage: { - status: number | null; - lineage: Lineage; - }; - columnLineageMap: ColumnLineageMap; } export default function reducer( @@ -436,44 +330,6 @@ export default function reducer( ...state, tableOwners: tableOwnersReducer(state.tableOwners, action), }; - case GetTableLineage.SUCCESS: - case GetTableLineage.FAILURE: - return { - ...state, - tableLineage: { - lineage: (action).payload.lineage, - status: (action).payload.status, - }, - }; - case GetColumnLineage.REQUEST: { - const { columnName } = (action).payload; - return { - ...state, - columnLineageMap: { - ...state.columnLineageMap, - [columnName]: { - lineage: emptyLineage, - isLoading: true, - }, - }, - }; - } - case GetColumnLineage.SUCCESS: - case GetColumnLineage.FAILURE: { - const { columnName, lineage: columnLineage } = (< - GetColumnLineageResponse - >action).payload; - return { - ...state, - columnLineageMap: { - ...state.columnLineageMap, - [columnName]: { - lineage: columnLineage, - isLoading: false, - }, - }, - }; - } default: return state; } diff --git a/frontend/amundsen_application/static/js/ducks/tableMetadata/sagas.ts b/frontend/amundsen_application/static/js/ducks/tableMetadata/sagas.ts index fd9e21994e..126275e8a7 100644 --- a/frontend/amundsen_application/static/js/ducks/tableMetadata/sagas.ts +++ b/frontend/amundsen_application/static/js/ducks/tableMetadata/sagas.ts @@ -13,10 +13,6 @@ import { getColumnDescriptionSuccess, getPreviewDataFailure, getPreviewDataSuccess, - getTableLineageSuccess, - getTableLineageFailure, - getColumnLineageSuccess, - getColumnLineageFailure, } from './reducer'; import { @@ -32,10 +28,6 @@ import { UpdateColumnDescriptionRequest, UpdateTableDescription, UpdateTableDescriptionRequest, - GetTableLineageRequest, - GetTableLineage, - GetColumnLineageRequest, - GetColumnLineage, } from './types'; export function* getTableDataWorker(action: GetTableDataRequest): SagaIterator { @@ -183,36 +175,3 @@ export function* getPreviewDataWorker( export function* getPreviewDataWatcher(): SagaIterator { yield takeLatest(GetPreviewData.REQUEST, getPreviewDataWorker); } - -export function* getTableLineageWorker( - action: GetTableLineageRequest -): SagaIterator { - try { - const response = yield call(API.getTableLineage, action.payload.key); - const { data, status } = response; - yield put(getTableLineageSuccess(data, status)); - } catch (error) { - const { status } = error; - yield put(getTableLineageFailure(status)); - } -} -export function* getTableLineageWatcher(): SagaIterator { - yield takeEvery(GetTableLineage.REQUEST, getTableLineageWorker); -} - -export function* getColumnLineageWorker( - action: GetColumnLineageRequest -): SagaIterator { - const { key, columnName } = action.payload; - try { - const response = yield call(API.getColumnLineage, key, columnName); - const { data, status } = response; - yield put(getColumnLineageSuccess(data, columnName, status)); - } catch (error) { - const { status } = error; - yield put(getColumnLineageFailure(columnName, status)); - } -} -export function* getColumnLineageWatcher(): SagaIterator { - yield takeEvery(GetColumnLineage.REQUEST, getColumnLineageWorker); -} diff --git a/frontend/amundsen_application/static/js/ducks/tableMetadata/types.ts b/frontend/amundsen_application/static/js/ducks/tableMetadata/types.ts index 6d8107f4ff..ff050941d5 100644 --- a/frontend/amundsen_application/static/js/ducks/tableMetadata/types.ts +++ b/frontend/amundsen_application/static/js/ducks/tableMetadata/types.ts @@ -1,5 +1,4 @@ import { - AnalyticsEvent, DashboardResource, OwnerDict, PreviewData, @@ -7,7 +6,6 @@ import { TableMetadata, UpdateOwnerPayload, Tag, - Lineage, } from 'interfaces'; export enum GetTableData { @@ -156,46 +154,3 @@ export interface UpdateTableOwnerResponse { owners: OwnerDict; }; } - -export enum GetTableLineage { - REQUEST = 'amundsen/tableMetadata/GET_TABLE_LINEAGE_REQUEST', - SUCCESS = 'amundsen/tableMetadata/GET_TABLE_LINEAGE_SUCCESS', - FAILURE = 'amundsen/tableMetadata/GET_TABLE_LINEAGE_FAILURE', -} -export interface GetTableLineageRequest { - type: GetTableLineage.REQUEST; - payload: { - key: string; - }; -} -export interface GetTableLineageResponse { - type: GetTableLineage.SUCCESS | GetTableLineage.FAILURE; - payload: { - lineage: Lineage; - status: number; - }; -} - -export enum GetColumnLineage { - REQUEST = 'amundsen/tableMetadata/GET_COLUMN_LINEAGE_REQUEST', - SUCCESS = 'amundsen/tableMetadata/GET_COLUMN_LINEAGE_SUCCESS', - FAILURE = 'amundsen/tableMetadata/GET_COLUMN_LINEAGE_FAILURE', -} -export interface GetColumnLineageRequest { - type: GetColumnLineage.REQUEST; - payload: { - key: string; - columnName: string; - }; - meta: { - analytics: AnalyticsEvent; - }; -} -export interface GetColumnLineageResponse { - type: GetColumnLineage.SUCCESS | GetColumnLineage.FAILURE; - payload: { - lineage: Lineage; - columnName: string; - status: number; - }; -} diff --git a/frontend/amundsen_application/static/js/ducks/utilMethods.ts b/frontend/amundsen_application/static/js/ducks/utilMethods.ts index 6de2ea770a..ae5c182ea6 100644 --- a/frontend/amundsen_application/static/js/ducks/utilMethods.ts +++ b/frontend/amundsen_application/static/js/ducks/utilMethods.ts @@ -1,4 +1,5 @@ import { Tag } from 'interfaces'; +import * as qs from 'simple-query-string'; export function sortTagsAlphabetical(a: Tag, b: Tag): number { return a.tag_name.localeCompare(b.tag_name); @@ -27,3 +28,12 @@ export function filterFromObj( return obj; }, {}); } + +/** + * Takes a parameter objects and generates the query string parameters needed for requests. + * Example: + * const queryParameters = getQueryParams({key: tableData.key, column_name: columnName}) + */ +export function getQueryParams(params: object): string { + return qs.stringify(params); +} diff --git a/frontend/amundsen_application/static/js/features/ColumnList/ColumnLineage/index.tsx b/frontend/amundsen_application/static/js/features/ColumnList/ColumnLineage/index.tsx index 5fe3f50e83..af8d1c0d70 100644 --- a/frontend/amundsen_application/static/js/features/ColumnList/ColumnLineage/index.tsx +++ b/frontend/amundsen_application/static/js/features/ColumnList/ColumnLineage/index.tsx @@ -5,9 +5,10 @@ import * as React from 'react'; import { connect } from 'react-redux'; import { GlobalState } from 'ducks/rootReducer'; -import { emptyLineage } from 'ducks/tableMetadata/reducer'; +import { initialLineageState } from 'ducks/lineage/reducer'; import { getColumnLineageLink } from 'config/config-utils'; -import { Lineage, LineageItem, TableMetadata } from 'interfaces/TableMetadata'; +import { TableMetadata } from 'interfaces/TableMetadata'; +import { Lineage, LineageItem } from 'interfaces/Lineage'; import { logClick } from 'utils/analytics'; import ColumnLineageLoader from '../ColumnLineageLoader'; import { @@ -135,10 +136,11 @@ export const mapStateToProps = ( state: GlobalState, ownProps: ColumnLineageListOwnProps ) => { - const { columnLineageMap, tableData } = state.tableMetadata; + const { tableData } = state.tableMetadata; + const { columnLineageMap } = state.lineage; const columnStateObject = columnLineageMap[ownProps.columnName]; const lineage = - (columnStateObject && columnStateObject.lineage) || emptyLineage; + (columnStateObject && columnStateObject.lineageTree) || initialLineageState; const isLoading = columnStateObject && columnStateObject.isLoading; return { tableData, diff --git a/frontend/amundsen_application/static/js/features/ColumnList/index.tsx b/frontend/amundsen_application/static/js/features/ColumnList/index.tsx index 97096e7549..ecbfbf3743 100644 --- a/frontend/amundsen_application/static/js/features/ColumnList/index.tsx +++ b/frontend/amundsen_application/static/js/features/ColumnList/index.tsx @@ -7,8 +7,6 @@ import { connect } from 'react-redux'; import { bindActionCreators } from 'redux'; import { OpenRequestAction } from 'ducks/notification/types'; -import { GetColumnLineageRequest } from 'ducks/tableMetadata/types'; -import { getColumnLineage } from 'ducks/tableMetadata/reducer'; import EditableSection from 'components/EditableSection'; import Table, { @@ -36,6 +34,8 @@ import { import BadgeList from 'features/BadgeList'; import { getUniqueValues, filterOutUniqueValues } from 'utils/stats'; import { logAction } from 'utils/analytics'; +import { getTableColumnLineage } from 'ducks/lineage/reducer'; +import { GetTableColumnLineageRequest } from 'ducks/lineage/types'; import ColumnLineage from 'features/ColumnList/ColumnLineage'; import ColumnType from './ColumnType'; import ColumnDescEditableText from './ColumnDescEditableText'; @@ -67,7 +67,7 @@ export interface DispatchFromProps { getColumnLineageDispatch: ( key: string, columnName: string - ) => GetColumnLineageRequest; + ) => GetTableColumnLineageRequest; } export type ColumnListProps = ComponentProps & DispatchFromProps; @@ -373,7 +373,10 @@ const ColumnList: React.FC = ({ }; export const mapDispatchToProps = (dispatch: any) => - bindActionCreators({ getColumnLineageDispatch: getColumnLineage }, dispatch); + bindActionCreators( + { getColumnLineageDispatch: getTableColumnLineage }, + dispatch + ); export default connect<{}, DispatchFromProps, ComponentProps>( null, diff --git a/frontend/amundsen_application/static/js/fixtures/globalState.ts b/frontend/amundsen_application/static/js/fixtures/globalState.ts index ba8a7b9189..0ba36f4f9e 100644 --- a/frontend/amundsen_application/static/js/fixtures/globalState.ts +++ b/frontend/amundsen_application/static/js/fixtures/globalState.ts @@ -178,18 +178,10 @@ const globalState: GlobalState = { watermarks: [], programmatic_descriptions: {}, }, - tableLineage: { - lineage: { - upstream_entities: [], - downstream_entities: [], - }, - status: null, - }, tableOwners: { isLoading: true, owners: {}, }, - columnLineageMap: {}, }, lastIndexed: { lastIndexed: 1555632106 }, tags: { @@ -265,6 +257,18 @@ const globalState: GlobalState = { }, }, ui: {}, + lineage: { + lineageTree: { + upstream_entities: [], + downstream_entities: [], + key: '', + direction: 'both', + depth: 1, + }, + statusCode: null, + isLoading: false, + columnLineageMap: {}, + }, }; export default globalState; diff --git a/frontend/amundsen_application/static/js/fixtures/metadata/table.ts b/frontend/amundsen_application/static/js/fixtures/metadata/table.ts index 2efb47acef..ad275d7ba0 100644 --- a/frontend/amundsen_application/static/js/fixtures/metadata/table.ts +++ b/frontend/amundsen_application/static/js/fixtures/metadata/table.ts @@ -175,6 +175,9 @@ export const tableMetadata: TableMetadata = { export const emptyTableLineage: Lineage = { downstream_entities: [], upstream_entities: [], + key: 'database://cluster.schema/table_name', + depth: 1, + direction: 'both', }; export const tableLineage: Lineage = { @@ -188,6 +191,7 @@ export const tableLineage: Lineage = { name: 'table_name', schema: 'schema', usage: 1398, + parent: 'database://cluster.schema/parent_table_name', }, ], upstream_entities: [ @@ -200,8 +204,12 @@ export const tableLineage: Lineage = { name: 'table_name', schema: 'schema', usage: 1398, + parent: 'database://cluster.schema/parent_table_name', }, ], + key: 'database://cluster.schema/table_name', + depth: 1, + direction: 'both', }; export const relatedDashboards: DashboardResource[] = [ diff --git a/frontend/amundsen_application/static/js/interfaces/Lineage.ts b/frontend/amundsen_application/static/js/interfaces/Lineage.ts new file mode 100644 index 0000000000..ee66e2d359 --- /dev/null +++ b/frontend/amundsen_application/static/js/interfaces/Lineage.ts @@ -0,0 +1,43 @@ +import { Badge } from './Badges'; + +export interface LineageItem { + badges: Badge[]; + cluster: string; + database: string; + key: string; + level: number; + name: string; + schema: string; + parent: string; + usage: number; +} + +export interface Lineage { + key: string; + direction: string; + depth: number; + downstream_entities: LineageItem[]; + upstream_entities: LineageItem[]; +} + +export interface TableLineageParams { + key: string; + direction: string; + depth: number; +} + +export interface ColumnLineageParams { + key: string; + direction: string; + depth: number; + column: string; +} + +// To keep the backward compatibility for the list based lineage +// ToDo: Please remove once list based view is deprecated +export interface ColumnLineageMap { + [columnName: string]: { + lineageTree: Lineage; + isLoading: boolean; + }; +} diff --git a/frontend/amundsen_application/static/js/interfaces/TableMetadata.ts b/frontend/amundsen_application/static/js/interfaces/TableMetadata.ts index 9cb42bcb29..f0042efa9f 100644 --- a/frontend/amundsen_application/static/js/interfaces/TableMetadata.ts +++ b/frontend/amundsen_application/static/js/interfaces/TableMetadata.ts @@ -124,35 +124,3 @@ export interface Watermark { partition_value: string; watermark_type: string; } - -export interface LineageItem { - badges: Badge[]; - cluster: string; - database: string; - key: string; - level: number; - name: string; - schema: string; - usage: number; -} - -export interface Lineage { - downstream_entities: LineageItem[]; - upstream_entities: LineageItem[]; -} - -export interface ColumnLineageMap { - [columnName: string]: { - lineage: Lineage; - isLoading: boolean; - }; -} - -export interface TableLineageParams { - key: string; -} - -export interface ColumnLineageParams { - key: string; - column: string; -} diff --git a/frontend/amundsen_application/static/js/interfaces/index.ts b/frontend/amundsen_application/static/js/interfaces/index.ts index 792a1cc231..5dc93968b9 100644 --- a/frontend/amundsen_application/static/js/interfaces/index.ts +++ b/frontend/amundsen_application/static/js/interfaces/index.ts @@ -9,3 +9,4 @@ export * from './Resources'; export * from './TableMetadata'; export * from './Tags'; export * from './User'; +export * from './Lineage'; diff --git a/frontend/amundsen_application/static/js/pages/TableDetailPage/LineageLink/index.tsx b/frontend/amundsen_application/static/js/pages/TableDetailPage/LineageLink/index.tsx index a9b7d373b8..e4cfb31b84 100644 --- a/frontend/amundsen_application/static/js/pages/TableDetailPage/LineageLink/index.tsx +++ b/frontend/amundsen_application/static/js/pages/TableDetailPage/LineageLink/index.tsx @@ -16,7 +16,7 @@ const LineageLink: React.FC = ({ tableData, }: LineageLinkProps) => { const config = AppConfig.tableLineage; - if (!config.isEnabled) return null; + if (!config.externalEnabled) return null; const { database, cluster, schema, name } = tableData; const href = config.urlGenerator(database, cluster, schema, name); diff --git a/frontend/amundsen_application/static/js/pages/TableDetailPage/LineageList/index.tsx b/frontend/amundsen_application/static/js/pages/TableDetailPage/LineageList/index.tsx index 7b605b6287..9f4076575f 100644 --- a/frontend/amundsen_application/static/js/pages/TableDetailPage/LineageList/index.tsx +++ b/frontend/amundsen_application/static/js/pages/TableDetailPage/LineageList/index.tsx @@ -4,7 +4,7 @@ import * as React from 'react'; import { ResourceType, TableResource } from 'interfaces/Resources'; -import { LineageItem } from 'interfaces/TableMetadata'; +import { LineageItem } from 'interfaces/Lineage'; import TableListItem from 'components/ResourceListItem/TableListItem'; export interface LineageListProps { diff --git a/frontend/amundsen_application/static/js/pages/TableDetailPage/index.tsx b/frontend/amundsen_application/static/js/pages/TableDetailPage/index.tsx index f72ce929ab..7f8e8dd573 100644 --- a/frontend/amundsen_application/static/js/pages/TableDetailPage/index.tsx +++ b/frontend/amundsen_application/static/js/pages/TableDetailPage/index.tsx @@ -9,13 +9,12 @@ import { bindActionCreators } from 'redux'; import { RouteComponentProps } from 'react-router'; import { GlobalState } from 'ducks/rootReducer'; -import { getTableData, getTableLineage } from 'ducks/tableMetadata/reducer'; +import { getTableData } from 'ducks/tableMetadata/reducer'; +import { getTableLineage } from 'ducks/lineage/reducer'; import { openRequestDescriptionDialog } from 'ducks/notification/reducer'; import { updateSearchState } from 'ducks/search/reducer'; -import { - GetTableDataRequest, - GetTableLineageRequest, -} from 'ducks/tableMetadata/types'; +import { GetTableDataRequest } from 'ducks/tableMetadata/types'; +import { GetTableLineageRequest } from 'ducks/lineage/types'; import { OpenRequestAction } from 'ducks/notification/types'; import { UpdateSearchStateRequest } from 'ducks/search/types'; @@ -513,7 +512,7 @@ export const mapStateToProps = (state: GlobalState) => ({ isLoading: state.tableMetadata.isLoading, statusCode: state.tableMetadata.statusCode, tableData: state.tableMetadata.tableData, - tableLineage: state.tableMetadata.tableLineage.lineage, + tableLineage: state.lineage.lineageTree, numRelatedDashboards: state.tableMetadata.dashboards ? state.tableMetadata.dashboards.dashboards.length : 0, diff --git a/metadata/metadata_service/proxy/neo4j_proxy.py b/metadata/metadata_service/proxy/neo4j_proxy.py index 7dac8273a9..8c69a2d0ee 100644 --- a/metadata/metadata_service/proxy/neo4j_proxy.py +++ b/metadata/metadata_service/proxy/neo4j_proxy.py @@ -410,7 +410,7 @@ def _put_resource_description(self, *, 'key': uri}) if not result.single(): - raise RuntimeError('Failed to update the resource {uri} description'.format(uri=uri)) + raise NotFoundException('Failed to update the description as resource {uri} does not exist'.format(uri=uri)) # end neo4j transaction tx.commit() @@ -506,8 +506,8 @@ def put_column_description(self, *, 'column_key': column_uri}) if not result.single(): - raise RuntimeError('Failed to update the table {tbl} ' - 'column {col} description'.format(tbl=table_uri, + raise NotFoundException('Failed to update the table {tbl} ' + 'column {col} description as either table or column does not exist'.format(tbl=table_uri, col=column_uri)) # end neo4j transaction @@ -1526,63 +1526,63 @@ def get_lineage(self, *, get_both_lineage_query = textwrap.dedent(u""" MATCH (source:{resource} {{key: $query_key}}) - OPTIONAL MATCH (source)-[downstream_len:HAS_DOWNSTREAM*..{depth}]->(downstream_entity:{resource}) - OPTIONAL MATCH (source)-[upstream_len:HAS_UPSTREAM*..{depth}]->(upstream_entity:{resource}) - WITH downstream_entity, upstream_entity, downstream_len, upstream_len + OPTIONAL MATCH dpath=(source)-[downstream_len:HAS_DOWNSTREAM*..{depth}]->(downstream_entity:{resource}) + OPTIONAL MATCH upath=(source)-[upstream_len:HAS_UPSTREAM*..{depth}]->(upstream_entity:{resource}) + WITH downstream_entity, upstream_entity, downstream_len, upstream_len, upath, dpath OPTIONAL MATCH (upstream_entity)-[:HAS_BADGE]->(upstream_badge:Badge) OPTIONAL MATCH (downstream_entity)-[:HAS_BADGE]->(downstream_badge:Badge) WITH CASE WHEN downstream_badge IS NULL THEN [] ELSE collect(distinct {{key:downstream_badge.key,category:downstream_badge.category}}) END AS downstream_badges, CASE WHEN upstream_badge IS NULL THEN [] ELSE collect(distinct {{key:upstream_badge.key,category:upstream_badge.category}}) - END AS upstream_badges, upstream_entity, downstream_entity, upstream_len, downstream_len + END AS upstream_badges, upstream_entity, downstream_entity, upstream_len, downstream_len, upath, dpath OPTIONAL MATCH (downstream_entity:{resource})-[downstream_read:READ_BY]->(:User) - WITH upstream_entity, downstream_entity, upstream_len, downstream_len, + WITH upstream_entity, downstream_entity, upstream_len, downstream_len, upath, dpath, downstream_badges, upstream_badges, sum(downstream_read.read_count) as downstream_read_count OPTIONAL MATCH (upstream_entity:{resource})-[upstream_read:READ_BY]->(:User) WITH upstream_entity, downstream_entity, upstream_len, downstream_len, downstream_badges, upstream_badges, downstream_read_count, - sum(upstream_read.read_count) as upstream_read_count + sum(upstream_read.read_count) as upstream_read_count, upath, dpath WITH CASE WHEN upstream_len IS NULL THEN [] ELSE COLLECT(distinct{{level:SIZE(upstream_len), source:split(upstream_entity.key,'://')[0], - key:upstream_entity.key, badges:upstream_badges, usage:upstream_read_count}}) + key:upstream_entity.key, badges:upstream_badges, usage:upstream_read_count, parent:nodes(upath)[-2].key}}) END AS upstream_entities, CASE WHEN downstream_len IS NULL THEN [] ELSE COLLECT(distinct{{level:SIZE(downstream_len), source:split(downstream_entity.key,'://')[0], - key:downstream_entity.key, badges:downstream_badges, usage:downstream_read_count}}) + key:downstream_entity.key, badges:downstream_badges, usage:downstream_read_count, parent:nodes(dpath)[-2].key}}) END AS downstream_entities RETURN downstream_entities, upstream_entities """).format(depth=depth, resource=resource_type.name) get_upstream_lineage_query = textwrap.dedent(u""" MATCH (source:{resource} {{key: $query_key}}) - OPTIONAL MATCH (source)-[upstream_len:HAS_UPSTREAM*..{depth}]->(upstream_entity:{resource}) - WITH upstream_entity, upstream_len + OPTIONAL MATCH path=(source)-[upstream_len:HAS_UPSTREAM*..{depth}]->(upstream_entity:{resource}) + WITH upstream_entity, upstream_len, path OPTIONAL MATCH (upstream_entity)-[:HAS_BADGE]->(upstream_badge:Badge) WITH CASE WHEN upstream_badge IS NULL THEN [] ELSE collect(distinct {{key:upstream_badge.key,category:upstream_badge.category}}) - END AS upstream_badges, upstream_entity, upstream_len + END AS upstream_badges, upstream_entity, upstream_len, path OPTIONAL MATCH (upstream_entity:{resource})-[upstream_read:READ_BY]->(:User) WITH upstream_entity, upstream_len, upstream_badges, - sum(upstream_read.read_count) as upstream_read_count + sum(upstream_read.read_count) as upstream_read_count, path WITH CASE WHEN upstream_len IS NULL THEN [] ELSE COLLECT(distinct{{level:SIZE(upstream_len), source:split(upstream_entity.key,'://')[0], - key:upstream_entity.key, badges:upstream_badges, usage:upstream_read_count}}) + key:upstream_entity.key, badges:upstream_badges, usage:upstream_read_count, parent:nodes(path)[-2].key}}) END AS upstream_entities RETURN upstream_entities """).format(depth=depth, resource=resource_type.name) get_downstream_lineage_query = textwrap.dedent(u""" MATCH (source:{resource} {{key: $query_key}}) - OPTIONAL MATCH (source)-[downstream_len:HAS_DOWNSTREAM*..{depth}]->(downstream_entity:{resource}) - WITH downstream_entity, downstream_len + OPTIONAL MATCH path=(source)-[downstream_len:HAS_DOWNSTREAM*..{depth}]->(downstream_entity:{resource}) + WITH downstream_entity, downstream_len, path OPTIONAL MATCH (downstream_entity)-[:HAS_BADGE]->(downstream_badge:Badge) WITH CASE WHEN downstream_badge IS NULL THEN [] ELSE collect(distinct {{key:downstream_badge.key,category:downstream_badge.category}}) - END AS downstream_badges, downstream_entity, downstream_len + END AS downstream_badges, downstream_entity, downstream_len, path OPTIONAL MATCH (downstream_entity:{resource})-[downstream_read:READ_BY]->(:User) WITH downstream_entity, downstream_len, downstream_badges, - sum(downstream_read.read_count) as downstream_read_count + sum(downstream_read.read_count) as downstream_read_count, path WITH CASE WHEN downstream_len IS NULL THEN [] ELSE COLLECT(distinct{{level:SIZE(downstream_len), source:split(downstream_entity.key,'://')[0], - key:downstream_entity.key, badges:downstream_badges, usage:downstream_read_count}}) + key:downstream_entity.key, badges:downstream_badges, usage:downstream_read_count, parent:nodes(path)[-2].key}}) END AS downstream_entities RETURN downstream_entities """).format(depth=depth, resource=resource_type.name) @@ -1607,15 +1607,20 @@ def get_lineage(self, *, "source": downstream["source"], "level": downstream["level"], "badges": self._make_badges(downstream["badges"]), - "usage": downstream.get("usage", 0)})) + "usage": downstream.get("usage", 0), + "parent": downstream.get("parent", '') + })) for upstream in result.get("upstream_entities") or []: upstream_tables.append(LineageItem(**{"key": upstream["key"], "source": upstream["source"], "level": upstream["level"], "badges": self._make_badges(upstream["badges"]), - "usage": upstream.get("usage", 0)})) + "usage": upstream.get("usage", 0), + "parent": upstream.get("parent", '') + })) + # ToDo: Add a root_entity as an item, which will make it easier for lineage graph return Lineage(**{"key": id, "upstream_entities": upstream_tables, "downstream_entities": downstream_tables, diff --git a/metadata/tests/unit/proxy/test_neo4j_proxy.py b/metadata/tests/unit/proxy/test_neo4j_proxy.py index 850c5bb26e..775f53d7f1 100644 --- a/metadata/tests/unit/proxy/test_neo4j_proxy.py +++ b/metadata/tests/unit/proxy/test_neo4j_proxy.py @@ -1094,17 +1094,17 @@ def test_get_lineage_success(self) -> None: key = "alpha" mock_execute.return_value.single.side_effect = [{ "upstream_entities": [ - {"key": "beta", "source": "gold", "level": 1, "badges": [], "usage":100}, + {"key": "beta", "source": "gold", "level": 1, "badges": [], "usage":100, "parent": None}, {"key": "gamma", "source": "dyno", "level": 1, "badges": [ {"key": "badge1", "category": "default"}, {"key": "badge2", "category": "default"}, ], - "usage": 200}, + "usage": 200, "parent": None}, ], "downstream_entities": [ - {"key": "delta", "source": "gold", "level": 1, "badges": [], "usage": 50}, + {"key": "delta", "source": "gold", "level": 1, "badges": [], "usage": 50, "parent": None}, ] }]