From fed58012282776a6e2f4dfaa0d915224ecd5fa19 Mon Sep 17 00:00:00 2001 From: Julien Guitton Date: Tue, 7 Feb 2023 15:21:10 +0100 Subject: [PATCH 01/33] add dockeroperator management --- cosmos/providers/dbt/core/operators_docker.py | 374 ++++++++++++++++++ cosmos/providers/dbt/dag.py | 2 + cosmos/providers/dbt/render.py | 7 +- cosmos/providers/dbt/task_group.py | 2 + pyproject.toml | 2 + 5 files changed, 384 insertions(+), 3 deletions(-) create mode 100644 cosmos/providers/dbt/core/operators_docker.py diff --git a/cosmos/providers/dbt/core/operators_docker.py b/cosmos/providers/dbt/core/operators_docker.py new file mode 100644 index 000000000..82518a6e2 --- /dev/null +++ b/cosmos/providers/dbt/core/operators_docker.py @@ -0,0 +1,374 @@ +from __future__ import annotations + +import os +import signal +from typing import Dict, Any, List, Sequence + +import yaml +from airflow.compat.functools import cached_property +from airflow.exceptions import AirflowException, AirflowSkipException +from airflow.hooks.subprocess import SubprocessHook +from airflow.providers.docker.operators.docker import DockerOperator +from airflow.utils.context import Context +from airflow.utils.operator_helpers import context_to_airflow_vars + +from cosmos.providers.dbt.core.utils.profiles_generator import ( + map_profile, +) + +class DbtDockerBaseOperator(DockerOperator): + """ + Executes a dbt core cli command. + + :param project_dir: Which directory to look in for the dbt_project.yml file. Default is the current working + directory and its parents. + :type project_dir: str + :param conn_id: The airflow connection to use as the target + :type conn_id: str + :param base_cmd: dbt sub-command to run (i.e ls, seed, run, test, etc.) + :type base_cmd: str | list[str] + :param select: dbt optional argument that specifies which nodes to include. + :type select: str + :param exclude: dbt optional argument that specifies which models to exclude. + :type exclude: str + :param selector: dbt optional argument - the selector name to use, as defined in selectors.yml + :type selector: str + :param vars: dbt optional argument - Supply variables to the project. This argument overrides variables + defined in your dbt_project.yml file. This argument should be a YAML + string, eg. '{my_variable: my_value}' (templated) + :type vars: dict + :param models: dbt optional argument that specifies which nodes to include. + :type models: str + :param cache_selected_only: + :type cache_selected_only: bool + :param no_version_check: dbt optional argument - If set, skip ensuring dbt's version matches the one specified in + the dbt_project.yml file ('require-dbt-version') + :type no_version_check: bool + :param fail_fast: dbt optional argument to make dbt exit immediately if a single resource fails to build. + :type fail_fast: bool + :param quiet: dbt optional argument to show only error logs in stdout + :type quiet: bool + :param warn_error: dbt optional argument to convert dbt warnings into errors + :type warn_error: bool + :param db_name: override the target db instead of the one supplied in the airflow connection + :type db_name: str + :param schema: override the target schema instead of the one supplied in the airflow connection + :type schema: str + :param env: If env is not None, it must be a dict that defines the + environment variables for the new process; these are used instead + of inheriting the current process environment, which is the default + behavior. (templated) + :type env: dict + :param append_env: If False(default) uses the environment variables passed in env params + and does not inherit the current process environment. If True, inherits the environment variables + from current passes and then environment variable passed by the user will either update the existing + inherited environment variables or the new variables gets appended to it + :type append_env: bool + :param output_encoding: Output encoding of bash command + :type output_encoding: str + :param skip_exit_code: If task exits with this exit code, leave the task + in ``skipped`` state (default: 99). If set to ``None``, any non-zero + exit code will be treated as a failure. + :type skip_exit_code: int + :param cancel_query_on_kill: If true, then cancel any running queries when the task's on_kill() is executed. + Otherwise, the query will keep running when the task is killed. + :type cancel_query_on_kill: bool + :param dbt_executable_path: Path to dbt executable can be used with venv (i.e. /home/astro/.pyenv/versions/dbt_venv/bin/dbt) + :type dbt_executable_path: str + """ + + template_fields: Sequence[str] = ("env", "vars") + DockerOperator.template_fields + + def __init__( + self, + project_dir: str, + conn_id: str, + base_cmd: str | List[str] = None, + select: str = None, + exclude: str = None, + selector: str = None, + vars: dict = None, + models: str = None, + cache_selected_only: bool = False, + no_version_check: bool = False, + fail_fast: bool = False, + quiet: bool = False, + warn_error: bool = False, + db_name: str = None, + schema: str = None, + env: dict = None, + append_env: bool = False, + output_encoding: str = "utf-8", + skip_exit_code: int = 99, + cancel_query_on_kill: bool = True, + dbt_executable_path: str = "dbt", + container_flags: Dict[str, Any] = {}, + **kwargs, + ) -> None: + self.project_dir = project_dir + self.conn_id = conn_id + self.base_cmd = base_cmd + self.select = select + self.exclude = exclude + self.selector = selector + self.vars = vars + self.models = models + self.cache_selected_only = cache_selected_only + self.no_version_check = no_version_check + self.fail_fast = fail_fast + self.quiet = quiet + self.warn_error = warn_error + self.db_name = db_name + self.schema = schema + self.env = env + self.append_env = append_env + self.output_encoding = output_encoding + self.skip_exit_code = skip_exit_code + self.cancel_query_on_kill = cancel_query_on_kill + self.dbt_executable_path = dbt_executable_path + self.container_flags = container_flags + super().__init__(**kwargs) + + @cached_property + def subprocess_hook(self): + """Returns hook for running the bash command.""" + return SubprocessHook() + + def get_env(self, context): + """Builds the set of environment variables to be exposed for the bash command.""" + system_env = os.environ.copy() + env = self.env + if env is None: + env = system_env + else: + if self.append_env: + system_env.update(env) + env = system_env + + airflow_context_vars = context_to_airflow_vars(context, in_env_var_format=True) + self.log.debug( + "Exporting the following env vars:\n%s", + "\n".join(f"{k}={v}" for k, v in airflow_context_vars.items()), + ) + env.update(airflow_context_vars) + + return env + + def exception_handling(self, result): + if self.skip_exit_code is not None and result.exit_code == self.skip_exit_code: + raise AirflowSkipException( + f"dbt command returned exit code {self.skip_exit_code}. Skipping." + ) + elif result.exit_code != 0: + raise AirflowException( + f"dbt command failed. The command returned a non-zero exit code {result.exit_code}." + ) + + def add_global_flags(self): + global_flags = [ + "project_dir", + "select", + "exclude", + "selector", + "vars", + "models", + ] + + flags = [] + for global_flag in global_flags: + dbt_name = f"--{global_flag.replace('_', '-')}" + + global_flag_value = self.container_flags.get(global_flag) + if global_flag_value is None: + global_flag_value = self.__getattribute__(global_flag) + + if global_flag_value is not None: + if isinstance(global_flag_value, dict): + # handle dict + yaml_string = yaml.dump(global_flag_value) + flags.append(dbt_name) + flags.append(yaml_string) + else: + flags.append(dbt_name) + flags.append(str(global_flag_value)) + + global_boolean_flags = [ + "no_version_check", + "cache_selected_only", + "fail_fast", + "quiet", + "warn_error", + ] + for global_boolean_flag in global_boolean_flags: + dbt_name = f"--{global_boolean_flag.replace('_', '-')}" + global_boolean_flag_value = self.__getattribute__(global_boolean_flag) + if global_boolean_flag_value is True: + flags.append(dbt_name) + return flags + + def build_cmd(self, env: dict, cmd_flags: list = None): + _, profile_vars = map_profile( + conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema + ) + + # parse dbt command + dbt_cmd = [] + + ## start with the dbt executable + dbt_cmd.append(self.dbt_executable_path) + + ## add base cmd + if isinstance(self.base_cmd, str): + dbt_cmd.append(self.base_cmd) + else: + [dbt_cmd.append(item) for item in self.base_cmd] + + # add global flags + ## TODO add container_args + for item in self.add_global_flags(): + dbt_cmd.append(item) + + ## add command specific flags + if cmd_flags: + for item in cmd_flags: + dbt_cmd.append(item) + + ## add profile + dbt_cmd.append("--profiles-dir") + dbt_cmd.append("/conf") + + ## set env vars + self.environment = {**self.environment, **env, **profile_vars} + + self.command = dbt_cmd + +class DbtLSOperator(DbtDockerBaseOperator): + """ + Executes a dbt core ls command. + + """ + + ui_color = "#DBCDF6" + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.base_cmd = "ls" + + def execute(self, context: Context): + self.build_cmd(env=self.get_env(context)) + return super().execute(context) + + +class DbtSeedOperator(DbtDockerBaseOperator): + """ + Executes a dbt core seed command. + + :param full_refresh: dbt optional arg - dbt will treat incremental models as table models + + """ + + ui_color = "#F58D7E" + + def __init__(self, full_refresh: bool = False, **kwargs) -> None: + self.full_refresh = full_refresh + super().__init__(**kwargs) + self.base_cmd = "seed" + + def add_cmd_flags(self): + flags = [] + if self.full_refresh is True: + flags.append("--full-refresh") + + return flags + + def execute(self, context: Context): + cmd_flags = self.add_cmd_flags() + self.build_cmd(env=self.get_env(context), cmd_flags=cmd_flags) + return super().execute(context) + + +class DbtRunOperator(DbtDockerBaseOperator): + """ + Executes a dbt core run command. + + """ + + ui_color = "#7352BA" + ui_fgcolor = "#F4F2FC" + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.base_cmd = "run" + + def execute(self, context: Context): + self.build_cmd(env=self.get_env(context)) + return super().execute(context) + + +class DbtTestOperator(DbtDockerBaseOperator): + """ + Executes a dbt core test command. + + """ + + ui_color = "#8194E0" + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.base_cmd = "test" + + def execute(self, context: Context): + self.build_cmd(env=self.get_env(context)) + return super().execute(context) + + +class DbtRunOperationOperator(DbtDockerBaseOperator): + """ + Executes a dbt core run-operation command. + + :param macro_name: name of macro to execute + :type macro_name: str + :param args: Supply arguments to the macro. This dictionary will be mapped to the keyword arguments defined in the + selected macro. + :type args: dict + """ + + ui_color = "#8194E0" + template_fields: Sequence[str] = "args" + + def __init__(self, macro_name: str, args: dict = None, **kwargs) -> None: + self.macro_name = macro_name + self.args = args + super().__init__(**kwargs) + self.base_cmd = ["run-operation", macro_name] + + def add_cmd_flags(self): + flags = [] + if self.args is not None: + flags.append("--args") + flags.append(yaml.dump(self.args)) + return flags + + def execute(self, context: Context): + cmd_flags = self.add_cmd_flags() + self.build_cmd(env=self.get_env(context), cmd_flags=cmd_flags) + return super().execute(context) + + +class DbtDepsOperator(DbtDockerBaseOperator): + """ + Executes a dbt core deps command. + + :param vars: Supply variables to the project. This argument overrides variables defined in your dbt_project.yml file + :type vars: dict + """ + + ui_color = "#8194E0" + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.base_cmd = "deps" + + def execute(self, context: Context): + self.build_cmd(env=self.get_env(context)) + return super().execute(context) diff --git a/cosmos/providers/dbt/dag.py b/cosmos/providers/dbt/dag.py index 90b6f9b27..75198c9f9 100644 --- a/cosmos/providers/dbt/dag.py +++ b/cosmos/providers/dbt/dag.py @@ -41,6 +41,7 @@ def __init__( test_behavior: Literal["none", "after_each", "after_all"] = "after_each", select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, + execution_mode: str = None, *args: Any, **kwargs: Any, ) -> None: @@ -61,6 +62,7 @@ def __init__( conn_id=conn_id, select=select, exclude=exclude, + execution_mode=execution_mode, ) # call the airflow DAG constructor diff --git a/cosmos/providers/dbt/render.py b/cosmos/providers/dbt/render.py index 3348b3b5b..e1281c759 100644 --- a/cosmos/providers/dbt/render.py +++ b/cosmos/providers/dbt/render.py @@ -29,6 +29,7 @@ def render_project( conn_id: str = "default_conn_id", select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, + execution_mode: str = None, ) -> Group: """ Turn a dbt project into a Group @@ -118,7 +119,7 @@ def render_project( # make the run task run_task = Task( id=f"{model_name}_run", - operator_class="cosmos.providers.dbt.core.operators.DbtRunOperator", + operator_class=f'cosmos.providers.dbt.core.operators{"_docker" if execution_mode == "docker" else "kubernetes" if execution_mode == "kubernetes" else ""}.DbtRunOperator', arguments=run_args, ) @@ -134,7 +135,7 @@ def render_project( test_task = Task( id=f"{model_name}_test", - operator_class="cosmos.providers.dbt.core.operators.DbtTestOperator", + operator_class=f'cosmos.providers.dbt.core.operators{"_docker" if execution_mode == "docker" else "kubernetes" if execution_mode == "kubernetes" else ""}.DbtTestOperator', upstream_entity_ids=[run_task.id], arguments=test_args, ) @@ -166,7 +167,7 @@ def render_project( # make a test task test_task = Task( id=f"{dbt_project_name}_test", - operator_class="cosmos.providers.dbt.core.operators.DbtTestOperator", + operator_class=f'cosmos.providers.dbt.core.operators{"_docker" if execution_mode == "docker" else "kubernetes" if execution_mode == "kubernetes" else ""}.DbtTestOperator', arguments=task_args, ) entities[test_task.id] = test_task diff --git a/cosmos/providers/dbt/task_group.py b/cosmos/providers/dbt/task_group.py index 3dd4aa9c3..e40fce60f 100644 --- a/cosmos/providers/dbt/task_group.py +++ b/cosmos/providers/dbt/task_group.py @@ -41,6 +41,7 @@ def __init__( test_behavior: Literal["none", "after_each", "after_all"] = "after_each", select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, + execution_mode: str = None, *args: Any, **kwargs: Any, ) -> None: @@ -61,6 +62,7 @@ def __init__( conn_id=conn_id, select=select, exclude=exclude, + execution_mode=execution_mode, ) # call the airflow constructor diff --git a/pyproject.toml b/pyproject.toml index 8fe352e2b..f0b5f3ece 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,8 @@ classifiers = [ ] dependencies = [ "apache-airflow>=2.4", + "apache-airflow-providers-docker>=3.5.0", + "apache-airflow-providers-cncf-kubernetes=>5.1.1" "Jinja2>=3.0.0", "typing-extensions; python_version < '3.8'", ] From 016192f83c3db53272f3269018dc07ce4501c1ee Mon Sep 17 00:00:00 2001 From: Julien Guitton Date: Wed, 8 Feb 2023 10:39:39 +0100 Subject: [PATCH 02/33] add kubernetespodoperator management --- cosmos/providers/dbt/core/operators_docker.py | 8 +- .../dbt/core/operators_kubernetes.py | 380 ++++++++++++++++++ .../dbt/core/utils/profiles_generator.py | 2 +- cosmos/providers/dbt/render.py | 8 +- pyproject.toml | 2 +- 5 files changed, 388 insertions(+), 12 deletions(-) create mode 100644 cosmos/providers/dbt/core/operators_kubernetes.py diff --git a/cosmos/providers/dbt/core/operators_docker.py b/cosmos/providers/dbt/core/operators_docker.py index 82518a6e2..bdc83897e 100644 --- a/cosmos/providers/dbt/core/operators_docker.py +++ b/cosmos/providers/dbt/core/operators_docker.py @@ -167,6 +167,7 @@ def exception_handling(self, result): def add_global_flags(self): global_flags = [ "project_dir", + "profiles_dir", "select", "exclude", "selector", @@ -224,7 +225,6 @@ def build_cmd(self, env: dict, cmd_flags: list = None): [dbt_cmd.append(item) for item in self.base_cmd] # add global flags - ## TODO add container_args for item in self.add_global_flags(): dbt_cmd.append(item) @@ -233,12 +233,8 @@ def build_cmd(self, env: dict, cmd_flags: list = None): for item in cmd_flags: dbt_cmd.append(item) - ## add profile - dbt_cmd.append("--profiles-dir") - dbt_cmd.append("/conf") - ## set env vars - self.environment = {**self.environment, **env, **profile_vars} + self.environment = {**env, **profile_vars, **self.environment} self.command = dbt_cmd diff --git a/cosmos/providers/dbt/core/operators_kubernetes.py b/cosmos/providers/dbt/core/operators_kubernetes.py new file mode 100644 index 000000000..4bb2f2f52 --- /dev/null +++ b/cosmos/providers/dbt/core/operators_kubernetes.py @@ -0,0 +1,380 @@ +from __future__ import annotations + +import os +import signal +from typing import Dict, Any, List, Sequence + +import yaml +from airflow.compat.functools import cached_property +from airflow.exceptions import AirflowException, AirflowSkipException +from airflow.hooks.subprocess import SubprocessHook +from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator +from airflow.utils.context import Context +from airflow.utils.operator_helpers import context_to_airflow_vars + +from cosmos.providers.dbt.core.utils.profiles_generator import ( + map_profile, +) +from airflow.providers.cncf.kubernetes.backcompat.backwards_compat_converters import ( + convert_env_vars, +) + +class DbtKubernetesBaseOperator(KubernetesPodOperator): + """ + Executes a dbt core cli command. + + :param project_dir: Which directory to look in for the dbt_project.yml file. Default is the current working + directory and its parents. + :type project_dir: str + :param conn_id: The airflow connection to use as the target + :type conn_id: str + :param base_cmd: dbt sub-command to run (i.e ls, seed, run, test, etc.) + :type base_cmd: str | list[str] + :param select: dbt optional argument that specifies which nodes to include. + :type select: str + :param exclude: dbt optional argument that specifies which models to exclude. + :type exclude: str + :param selector: dbt optional argument - the selector name to use, as defined in selectors.yml + :type selector: str + :param vars: dbt optional argument - Supply variables to the project. This argument overrides variables + defined in your dbt_project.yml file. This argument should be a YAML + string, eg. '{my_variable: my_value}' (templated) + :type vars: dict + :param models: dbt optional argument that specifies which nodes to include. + :type models: str + :param cache_selected_only: + :type cache_selected_only: bool + :param no_version_check: dbt optional argument - If set, skip ensuring dbt's version matches the one specified in + the dbt_project.yml file ('require-dbt-version') + :type no_version_check: bool + :param fail_fast: dbt optional argument to make dbt exit immediately if a single resource fails to build. + :type fail_fast: bool + :param quiet: dbt optional argument to show only error logs in stdout + :type quiet: bool + :param warn_error: dbt optional argument to convert dbt warnings into errors + :type warn_error: bool + :param db_name: override the target db instead of the one supplied in the airflow connection + :type db_name: str + :param schema: override the target schema instead of the one supplied in the airflow connection + :type schema: str + :param env: If env is not None, it must be a dict that defines the + environment variables for the new process; these are used instead + of inheriting the current process environment, which is the default + behavior. (templated) + :type env: dict + :param append_env: If False(default) uses the environment variables passed in env params + and does not inherit the current process environment. If True, inherits the environment variables + from current passes and then environment variable passed by the user will either update the existing + inherited environment variables or the new variables gets appended to it + :type append_env: bool + :param output_encoding: Output encoding of bash command + :type output_encoding: str + :param skip_exit_code: If task exits with this exit code, leave the task + in ``skipped`` state (default: 99). If set to ``None``, any non-zero + exit code will be treated as a failure. + :type skip_exit_code: int + :param cancel_query_on_kill: If true, then cancel any running queries when the task's on_kill() is executed. + Otherwise, the query will keep running when the task is killed. + :type cancel_query_on_kill: bool + :param dbt_executable_path: Path to dbt executable can be used with venv (i.e. /home/astro/.pyenv/versions/dbt_venv/bin/dbt) + :type dbt_executable_path: str + """ + + template_fields: Sequence[str] = ("env", "vars") + KubernetesPodOperator.template_fields + + def __init__( + self, + project_dir: str, + conn_id: str, + base_cmd: str | List[str] = None, + select: str = None, + exclude: str = None, + selector: str = None, + vars: dict = None, + models: str = None, + cache_selected_only: bool = False, + no_version_check: bool = False, + fail_fast: bool = False, + quiet: bool = False, + warn_error: bool = False, + db_name: str = None, + schema: str = None, + env: dict = None, + append_env: bool = False, + output_encoding: str = "utf-8", + skip_exit_code: int = 99, + cancel_query_on_kill: bool = True, + dbt_executable_path: str = "dbt", + container_flags: Dict[str, Any] = {}, + **kwargs, + ) -> None: + self.project_dir = project_dir + self.conn_id = conn_id + self.base_cmd = base_cmd + self.select = select + self.exclude = exclude + self.selector = selector + self.vars = vars + self.models = models + self.cache_selected_only = cache_selected_only + self.no_version_check = no_version_check + self.fail_fast = fail_fast + self.quiet = quiet + self.warn_error = warn_error + self.db_name = db_name + self.schema = schema + self.env = env + self.append_env = append_env + self.output_encoding = output_encoding + self.skip_exit_code = skip_exit_code + self.cancel_query_on_kill = cancel_query_on_kill + self.dbt_executable_path = dbt_executable_path + self.container_flags = container_flags + super().__init__(**kwargs) + + @cached_property + def subprocess_hook(self): + """Returns hook for running the bash command.""" + return SubprocessHook() + + def get_env(self, context): + """Builds the set of environment variables to be exposed for the bash command.""" + system_env = os.environ.copy() + env = self.env + if env is None: + env = system_env + else: + if self.append_env: + system_env.update(env) + env = system_env + + airflow_context_vars = context_to_airflow_vars(context, in_env_var_format=True) + self.log.debug( + "Exporting the following env vars:\n%s", + "\n".join(f"{k}={v}" for k, v in airflow_context_vars.items()), + ) + env.update(airflow_context_vars) + + return env + + def exception_handling(self, result): + if self.skip_exit_code is not None and result.exit_code == self.skip_exit_code: + raise AirflowSkipException( + f"dbt command returned exit code {self.skip_exit_code}. Skipping." + ) + elif result.exit_code != 0: + raise AirflowException( + f"dbt command failed. The command returned a non-zero exit code {result.exit_code}." + ) + + def add_global_flags(self): + global_flags = [ + "project_dir", + "profiles_dir", + "select", + "exclude", + "selector", + "vars", + "models", + ] + + flags = [] + for global_flag in global_flags: + dbt_name = f"--{global_flag.replace('_', '-')}" + + global_flag_value = self.container_flags.get(global_flag) + if global_flag_value is None: + global_flag_value = self.__getattribute__(global_flag) + + if global_flag_value is not None: + if isinstance(global_flag_value, dict): + # handle dict + yaml_string = yaml.dump(global_flag_value) + flags.append(dbt_name) + flags.append(yaml_string) + else: + flags.append(dbt_name) + flags.append(str(global_flag_value)) + + global_boolean_flags = [ + "no_version_check", + "cache_selected_only", + "fail_fast", + "quiet", + "warn_error", + ] + for global_boolean_flag in global_boolean_flags: + dbt_name = f"--{global_boolean_flag.replace('_', '-')}" + global_boolean_flag_value = self.__getattribute__(global_boolean_flag) + if global_boolean_flag_value is True: + flags.append(dbt_name) + return flags + + def build_env_args(self, env: dict): + env_vars_dict = {} + for env_var in self.env_vars: + env_vars_dict[env_var.name] = env_var.value + + self.env_vars = convert_env_vars({**env, **env_vars_dict}) + + def build_cmd(self, env: dict, cmd_flags: list = None): + _, profile_vars = map_profile( + conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema + ) + + # parse dbt args + dbt_args = [] + + ## start with the dbt executable + self.cmds = [self.dbt_executable_path] + + ## add base cmd + if isinstance(self.base_cmd, str): + dbt_args.append(self.base_cmd) + else: + [dbt_args.append(item) for item in self.base_cmd] + + # add global flags + for item in self.add_global_flags(): + dbt_args.append(item) + + ## add command specific flags + if cmd_flags: + for item in cmd_flags: + dbt_args.append(item) + + ## set env vars + self.build_env_args({**env, **profile_vars}) + + self.arguments = dbt_args + +class DbtLSOperator(DbtKubernetesBaseOperator): + """ + Executes a dbt core ls command. + + """ + + ui_color = "#DBCDF6" + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.base_cmd = "ls" + + def execute(self, context: Context): + self.build_cmd(env=self.get_env(context)) + return super().execute(context) + + +class DbtSeedOperator(DbtKubernetesBaseOperator): + """ + Executes a dbt core seed command. + + :param full_refresh: dbt optional arg - dbt will treat incremental models as table models + + """ + + ui_color = "#F58D7E" + + def __init__(self, full_refresh: bool = False, **kwargs) -> None: + self.full_refresh = full_refresh + super().__init__(**kwargs) + self.base_cmd = "seed" + + def add_cmd_flags(self): + flags = [] + if self.full_refresh is True: + flags.append("--full-refresh") + + return flags + + def execute(self, context: Context): + cmd_flags = self.add_cmd_flags() + self.build_cmd(env=self.get_env(context), cmd_flags=cmd_flags) + return super().execute(context) + + +class DbtRunOperator(DbtKubernetesBaseOperator): + """ + Executes a dbt core run command. + + """ + + ui_color = "#7352BA" + ui_fgcolor = "#F4F2FC" + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.base_cmd = "run" + + def execute(self, context: Context): + self.build_cmd(env=self.get_env(context)) + return super().execute(context) + + +class DbtTestOperator(DbtKubernetesBaseOperator): + """ + Executes a dbt core test command. + + """ + + ui_color = "#8194E0" + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.base_cmd = "test" + + def execute(self, context: Context): + self.build_cmd(env=self.get_env(context)) + return super().execute(context) + + +class DbtRunOperationOperator(DbtKubernetesBaseOperator): + """ + Executes a dbt core run-operation command. + + :param macro_name: name of macro to execute + :type macro_name: str + :param args: Supply arguments to the macro. This dictionary will be mapped to the keyword arguments defined in the + selected macro. + :type args: dict + """ + + ui_color = "#8194E0" + template_fields: Sequence[str] = "args" + + def __init__(self, macro_name: str, args: dict = None, **kwargs) -> None: + self.macro_name = macro_name + self.args = args + super().__init__(**kwargs) + self.base_cmd = ["run-operation", macro_name] + + def add_cmd_flags(self): + flags = [] + if self.args is not None: + flags.append("--args") + flags.append(yaml.dump(self.args)) + return flags + + def execute(self, context: Context): + cmd_flags = self.add_cmd_flags() + self.build_cmd(env=self.get_env(context), cmd_flags=cmd_flags) + return super().execute(context) + + +class DbtDepsOperator(DbtKubernetesBaseOperator): + """ + Executes a dbt core deps command. + + :param vars: Supply variables to the project. This argument overrides variables defined in your dbt_project.yml file + :type vars: dict + """ + + ui_color = "#8194E0" + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.base_cmd = "deps" + + def execute(self, context: Context): + self.build_cmd(env=self.get_env(context)) + return super().execute(context) diff --git a/cosmos/providers/dbt/core/utils/profiles_generator.py b/cosmos/providers/dbt/core/utils/profiles_generator.py index 3eed5abb6..4fd7ac0c8 100644 --- a/cosmos/providers/dbt/core/utils/profiles_generator.py +++ b/cosmos/providers/dbt/core/utils/profiles_generator.py @@ -168,7 +168,7 @@ def create_profile_vars(conn: Connection, database, schema): else: logger.error( - f"Connection type {conn.type} is not yet supported.", file=sys.stderr + f"Connection type {conn.conn_type} is not yet supported.", file=sys.stderr ) sys.exit(1) diff --git a/cosmos/providers/dbt/render.py b/cosmos/providers/dbt/render.py index e1281c759..99ca5a452 100644 --- a/cosmos/providers/dbt/render.py +++ b/cosmos/providers/dbt/render.py @@ -42,7 +42,7 @@ def render_project( :param emit_datasets: If enabled test nodes emit Airflow Datasets for downstream cross-DAG dependencies :param conn_id: The Airflow connection ID to use in Airflow Datasets :param select: A dict of dbt selector arguments (i.e., {"tags": ["tag_1", "tag_2"]}) - :param exclude: A dict of dbt exclude arguments (i.e., {"tags": ["tag_1", "tag_2]}}) + :param exclude: A dict of dbt exclude arguments (i.e., {"tags": ["tag_1", "tag_2]}) """ # first, get the dbt project project = DbtProject( @@ -119,7 +119,7 @@ def render_project( # make the run task run_task = Task( id=f"{model_name}_run", - operator_class=f'cosmos.providers.dbt.core.operators{"_docker" if execution_mode == "docker" else "kubernetes" if execution_mode == "kubernetes" else ""}.DbtRunOperator', + operator_class=f'cosmos.providers.dbt.core.operators{"_docker" if execution_mode == "docker" else "_kubernetes" if execution_mode == "kubernetes" else ""}.DbtRunOperator', arguments=run_args, ) @@ -135,7 +135,7 @@ def render_project( test_task = Task( id=f"{model_name}_test", - operator_class=f'cosmos.providers.dbt.core.operators{"_docker" if execution_mode == "docker" else "kubernetes" if execution_mode == "kubernetes" else ""}.DbtTestOperator', + operator_class=f'cosmos.providers.dbt.core.operators{"_docker" if execution_mode == "docker" else "_kubernetes" if execution_mode == "kubernetes" else ""}.DbtTestOperator', upstream_entity_ids=[run_task.id], arguments=test_args, ) @@ -167,7 +167,7 @@ def render_project( # make a test task test_task = Task( id=f"{dbt_project_name}_test", - operator_class=f'cosmos.providers.dbt.core.operators{"_docker" if execution_mode == "docker" else "kubernetes" if execution_mode == "kubernetes" else ""}.DbtTestOperator', + operator_class=f'cosmos.providers.dbt.core.operators{"_docker" if execution_mode == "docker" else "_kubernetes" if execution_mode == "kubernetes" else ""}.DbtTestOperator', arguments=task_args, ) entities[test_task.id] = test_task diff --git a/pyproject.toml b/pyproject.toml index f0b5f3ece..f42ddc93a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ classifiers = [ dependencies = [ "apache-airflow>=2.4", "apache-airflow-providers-docker>=3.5.0", - "apache-airflow-providers-cncf-kubernetes=>5.1.1" + "apache-airflow-providers-cncf-kubernetes>=5.1.1", "Jinja2>=3.0.0", "typing-extensions; python_version < '3.8'", ] From 03d51f47c585876bd2a1b0179af10671a8bcfe7b Mon Sep 17 00:00:00 2001 From: Julien Guitton Date: Wed, 8 Feb 2023 10:43:57 +0100 Subject: [PATCH 03/33] normalize docs --- cosmos/providers/dbt/core/operators_docker.py | 3 ++- cosmos/providers/dbt/core/operators_kubernetes.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cosmos/providers/dbt/core/operators_docker.py b/cosmos/providers/dbt/core/operators_docker.py index bdc83897e..8031ed47e 100644 --- a/cosmos/providers/dbt/core/operators_docker.py +++ b/cosmos/providers/dbt/core/operators_docker.py @@ -73,7 +73,8 @@ class DbtDockerBaseOperator(DockerOperator): :param cancel_query_on_kill: If true, then cancel any running queries when the task's on_kill() is executed. Otherwise, the query will keep running when the task is killed. :type cancel_query_on_kill: bool - :param dbt_executable_path: Path to dbt executable can be used with venv (i.e. /home/astro/.pyenv/versions/dbt_venv/bin/dbt) + :param dbt_executable_path: Path to dbt executable can be used with venv + (i.e. /home/astro/.pyenv/versions/dbt_venv/bin/dbt) :type dbt_executable_path: str """ diff --git a/cosmos/providers/dbt/core/operators_kubernetes.py b/cosmos/providers/dbt/core/operators_kubernetes.py index 4bb2f2f52..43527e5df 100644 --- a/cosmos/providers/dbt/core/operators_kubernetes.py +++ b/cosmos/providers/dbt/core/operators_kubernetes.py @@ -76,7 +76,8 @@ class DbtKubernetesBaseOperator(KubernetesPodOperator): :param cancel_query_on_kill: If true, then cancel any running queries when the task's on_kill() is executed. Otherwise, the query will keep running when the task is killed. :type cancel_query_on_kill: bool - :param dbt_executable_path: Path to dbt executable can be used with venv (i.e. /home/astro/.pyenv/versions/dbt_venv/bin/dbt) + :param dbt_executable_path: Path to dbt executable can be used with venv + (i.e. /home/astro/.pyenv/versions/dbt_venv/bin/dbt) :type dbt_executable_path: str """ From f1d8ae89840e82ec522f2dc3e290da45243d6afd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 8 Feb 2023 10:00:34 +0000 Subject: [PATCH 04/33] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cosmos/providers/dbt/core/operators_docker.py | 15 ++++---- .../dbt/core/operators_kubernetes.py | 34 +++++++++++-------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/cosmos/providers/dbt/core/operators_docker.py b/cosmos/providers/dbt/core/operators_docker.py index 8031ed47e..fcceb274b 100644 --- a/cosmos/providers/dbt/core/operators_docker.py +++ b/cosmos/providers/dbt/core/operators_docker.py @@ -1,8 +1,7 @@ from __future__ import annotations import os -import signal -from typing import Dict, Any, List, Sequence +from typing import Any, Dict, List, Sequence import yaml from airflow.compat.functools import cached_property @@ -12,9 +11,8 @@ from airflow.utils.context import Context from airflow.utils.operator_helpers import context_to_airflow_vars -from cosmos.providers.dbt.core.utils.profiles_generator import ( - map_profile, -) +from cosmos.providers.dbt.core.utils.profiles_generator import map_profile + class DbtDockerBaseOperator(DockerOperator): """ @@ -179,11 +177,11 @@ def add_global_flags(self): flags = [] for global_flag in global_flags: dbt_name = f"--{global_flag.replace('_', '-')}" - + global_flag_value = self.container_flags.get(global_flag) if global_flag_value is None: global_flag_value = self.__getattribute__(global_flag) - + if global_flag_value is not None: if isinstance(global_flag_value, dict): # handle dict @@ -236,9 +234,10 @@ def build_cmd(self, env: dict, cmd_flags: list = None): ## set env vars self.environment = {**env, **profile_vars, **self.environment} - + self.command = dbt_cmd + class DbtLSOperator(DbtDockerBaseOperator): """ Executes a dbt core ls command. diff --git a/cosmos/providers/dbt/core/operators_kubernetes.py b/cosmos/providers/dbt/core/operators_kubernetes.py index 43527e5df..87ee468f6 100644 --- a/cosmos/providers/dbt/core/operators_kubernetes.py +++ b/cosmos/providers/dbt/core/operators_kubernetes.py @@ -1,23 +1,23 @@ from __future__ import annotations import os -import signal -from typing import Dict, Any, List, Sequence +from typing import Any, Dict, List, Sequence import yaml from airflow.compat.functools import cached_property from airflow.exceptions import AirflowException, AirflowSkipException from airflow.hooks.subprocess import SubprocessHook -from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator -from airflow.utils.context import Context -from airflow.utils.operator_helpers import context_to_airflow_vars - -from cosmos.providers.dbt.core.utils.profiles_generator import ( - map_profile, -) from airflow.providers.cncf.kubernetes.backcompat.backwards_compat_converters import ( convert_env_vars, ) +from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import ( + KubernetesPodOperator, +) +from airflow.utils.context import Context +from airflow.utils.operator_helpers import context_to_airflow_vars + +from cosmos.providers.dbt.core.utils.profiles_generator import map_profile + class DbtKubernetesBaseOperator(KubernetesPodOperator): """ @@ -81,7 +81,10 @@ class DbtKubernetesBaseOperator(KubernetesPodOperator): :type dbt_executable_path: str """ - template_fields: Sequence[str] = ("env", "vars") + KubernetesPodOperator.template_fields + template_fields: Sequence[str] = ( + "env", + "vars", + ) + KubernetesPodOperator.template_fields def __init__( self, @@ -182,11 +185,11 @@ def add_global_flags(self): flags = [] for global_flag in global_flags: dbt_name = f"--{global_flag.replace('_', '-')}" - + global_flag_value = self.container_flags.get(global_flag) if global_flag_value is None: global_flag_value = self.__getattribute__(global_flag) - + if global_flag_value is not None: if isinstance(global_flag_value, dict): # handle dict @@ -210,12 +213,12 @@ def add_global_flags(self): if global_boolean_flag_value is True: flags.append(dbt_name) return flags - + def build_env_args(self, env: dict): env_vars_dict = {} for env_var in self.env_vars: env_vars_dict[env_var.name] = env_var.value - + self.env_vars = convert_env_vars({**env, **env_vars_dict}) def build_cmd(self, env: dict, cmd_flags: list = None): @@ -246,9 +249,10 @@ def build_cmd(self, env: dict, cmd_flags: list = None): ## set env vars self.build_env_args({**env, **profile_vars}) - + self.arguments = dbt_args + class DbtLSOperator(DbtKubernetesBaseOperator): """ Executes a dbt core ls command. From 2ffe266d11b042fb8e7f536775585e65e40536c5 Mon Sep 17 00:00:00 2001 From: Julien Guitton Date: Wed, 8 Feb 2023 11:04:06 +0100 Subject: [PATCH 05/33] fix lines too long --- cosmos/providers/dbt/render.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cosmos/providers/dbt/render.py b/cosmos/providers/dbt/render.py index 525862856..9b1cb6145 100644 --- a/cosmos/providers/dbt/render.py +++ b/cosmos/providers/dbt/render.py @@ -67,6 +67,12 @@ def render_project( f"{set(select['tags']).intersection(exclude['tags'])}" ) + operator_class = "" + if execution_mode == "docker": + operator_class = "_docker" + elif execution_mode == "kubernetes": + operator_class = "_kubernetes" + if "paths" in select and "paths" in exclude: if set(select["paths"]).intersection(exclude["paths"]): raise AirflowException( @@ -115,7 +121,7 @@ def render_project( # make the run task run_task = Task( id=f"{model_name}_run", - operator_class=f'cosmos.providers.dbt.core.operators{"_docker" if execution_mode == "docker" else "_kubernetes" if execution_mode == "kubernetes" else ""}.DbtRunOperator', + operator_class=f'cosmos.providers.dbt.core.operators{operator_class}.DbtRunOperator', arguments=run_args, ) @@ -131,7 +137,7 @@ def render_project( test_task = Task( id=f"{model_name}_test", - operator_class=f'cosmos.providers.dbt.core.operators{"_docker" if execution_mode == "docker" else "_kubernetes" if execution_mode == "kubernetes" else ""}.DbtTestOperator', + operator_class=f'cosmos.providers.dbt.core.operators{operator_class}.DbtTestOperator', upstream_entity_ids=[run_task.id], arguments=test_args, ) @@ -163,7 +169,7 @@ def render_project( # make a test task test_task = Task( id=f"{dbt_project_name}_test", - operator_class=f'cosmos.providers.dbt.core.operators{"_docker" if execution_mode == "docker" else "_kubernetes" if execution_mode == "kubernetes" else ""}.DbtTestOperator', + operator_class=f'cosmos.providers.dbt.core.operators{operator_class}.DbtTestOperator', arguments=task_args, ) entities[test_task.id] = test_task From 116a6236b56f18664f54ab07d20077d835af0922 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 8 Feb 2023 10:04:52 +0000 Subject: [PATCH 06/33] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cosmos/providers/dbt/render.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cosmos/providers/dbt/render.py b/cosmos/providers/dbt/render.py index 9b1cb6145..34ba0aab9 100644 --- a/cosmos/providers/dbt/render.py +++ b/cosmos/providers/dbt/render.py @@ -71,7 +71,7 @@ def render_project( if execution_mode == "docker": operator_class = "_docker" elif execution_mode == "kubernetes": - operator_class = "_kubernetes" + operator_class = "_kubernetes" if "paths" in select and "paths" in exclude: if set(select["paths"]).intersection(exclude["paths"]): @@ -121,7 +121,7 @@ def render_project( # make the run task run_task = Task( id=f"{model_name}_run", - operator_class=f'cosmos.providers.dbt.core.operators{operator_class}.DbtRunOperator', + operator_class=f"cosmos.providers.dbt.core.operators{operator_class}.DbtRunOperator", arguments=run_args, ) @@ -137,7 +137,7 @@ def render_project( test_task = Task( id=f"{model_name}_test", - operator_class=f'cosmos.providers.dbt.core.operators{operator_class}.DbtTestOperator', + operator_class=f"cosmos.providers.dbt.core.operators{operator_class}.DbtTestOperator", upstream_entity_ids=[run_task.id], arguments=test_args, ) @@ -169,7 +169,7 @@ def render_project( # make a test task test_task = Task( id=f"{dbt_project_name}_test", - operator_class=f'cosmos.providers.dbt.core.operators{operator_class}.DbtTestOperator', + operator_class=f"cosmos.providers.dbt.core.operators{operator_class}.DbtTestOperator", arguments=task_args, ) entities[test_task.id] = test_task From e833069f7cbcc2cdae1f477b5dd644fbbf56554d Mon Sep 17 00:00:00 2001 From: Julien Guitton Date: Thu, 9 Feb 2023 08:46:02 +0100 Subject: [PATCH 07/33] resolve conflict with main --- cosmos/providers/dbt/core/utils/profiles_generator.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/cosmos/providers/dbt/core/utils/profiles_generator.py b/cosmos/providers/dbt/core/utils/profiles_generator.py index f85512df4..e81260f3d 100644 --- a/cosmos/providers/dbt/core/utils/profiles_generator.py +++ b/cosmos/providers/dbt/core/utils/profiles_generator.py @@ -161,14 +161,7 @@ def create_profile_vars_databricks( if conn.password: token = conn.password else: -<<<<<<< HEAD - logger.error( - f"Connection type {conn.conn_type} is not yet supported.", file=sys.stderr - ) - sys.exit(1) -======= token = conn.extra_dejson.get("token") ->>>>>>> main profile_vars = { "DATABRICKS_HOST": str(conn.host).replace( From ce46043d212d72cf118b6e86593fa290dabc3310 Mon Sep 17 00:00:00 2001 From: Julien Guitton Date: Fri, 10 Feb 2023 10:12:06 +0100 Subject: [PATCH 08/33] Makes conn_id optional for k8s and docker --- cosmos/providers/dbt/core/operators_docker.py | 14 +++++++++----- cosmos/providers/dbt/core/operators_kubernetes.py | 14 +++++++++----- .../providers/dbt/core/utils/profiles_generator.py | 8 ++++++++ 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/cosmos/providers/dbt/core/operators_docker.py b/cosmos/providers/dbt/core/operators_docker.py index fcceb274b..2f3abb3e8 100644 --- a/cosmos/providers/dbt/core/operators_docker.py +++ b/cosmos/providers/dbt/core/operators_docker.py @@ -11,8 +11,10 @@ from airflow.utils.context import Context from airflow.utils.operator_helpers import context_to_airflow_vars -from cosmos.providers.dbt.core.utils.profiles_generator import map_profile - +from cosmos.providers.dbt.core.utils.profiles_generator import ( + map_profile, + conn_exists +) class DbtDockerBaseOperator(DockerOperator): """ @@ -207,9 +209,11 @@ def add_global_flags(self): return flags def build_cmd(self, env: dict, cmd_flags: list = None): - _, profile_vars = map_profile( - conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema - ) + profile_vars = {} + if (conn_exists(conn_id=self.conn_id)): + _, profile_vars = map_profile( + conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema + ) # parse dbt command dbt_cmd = [] diff --git a/cosmos/providers/dbt/core/operators_kubernetes.py b/cosmos/providers/dbt/core/operators_kubernetes.py index 87ee468f6..ca4dd9c85 100644 --- a/cosmos/providers/dbt/core/operators_kubernetes.py +++ b/cosmos/providers/dbt/core/operators_kubernetes.py @@ -16,8 +16,10 @@ from airflow.utils.context import Context from airflow.utils.operator_helpers import context_to_airflow_vars -from cosmos.providers.dbt.core.utils.profiles_generator import map_profile - +from cosmos.providers.dbt.core.utils.profiles_generator import ( + map_profile, + conn_exists +) class DbtKubernetesBaseOperator(KubernetesPodOperator): """ @@ -222,9 +224,11 @@ def build_env_args(self, env: dict): self.env_vars = convert_env_vars({**env, **env_vars_dict}) def build_cmd(self, env: dict, cmd_flags: list = None): - _, profile_vars = map_profile( - conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema - ) + profile_vars = {} + if (conn_exists(conn_id=self.conn_id)): + _, profile_vars = map_profile( + conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema + ) # parse dbt args dbt_args = [] diff --git a/cosmos/providers/dbt/core/utils/profiles_generator.py b/cosmos/providers/dbt/core/utils/profiles_generator.py index e81260f3d..ef61aff2e 100644 --- a/cosmos/providers/dbt/core/utils/profiles_generator.py +++ b/cosmos/providers/dbt/core/utils/profiles_generator.py @@ -10,6 +10,7 @@ import yaml from airflow.hooks.base import BaseHook from airflow.models.connection import Connection +from airflow.exceptions import AirflowNotFoundException from cosmos.providers.dbt.core.profiles.bigquery import bigquery_profile from cosmos.providers.dbt.core.profiles.databricks import databricks_profile @@ -254,3 +255,10 @@ def map_profile( sys.exit(1) return profile_vars_func(conn, database=db, schema=schema) + +def conn_exists(conn_id: str) -> bool: + try: + BaseHook().get_connection(conn_id) + return True + except AirflowNotFoundException: + return False \ No newline at end of file From 76a641ab0d8b9f489f95c3d8ef8fb055458d1b7a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 10 Feb 2023 09:12:36 +0000 Subject: [PATCH 09/33] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cosmos/providers/dbt/core/operators_docker.py | 12 ++++++------ cosmos/providers/dbt/core/operators_kubernetes.py | 12 ++++++------ .../providers/dbt/core/utils/profiles_generator.py | 5 +++-- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/cosmos/providers/dbt/core/operators_docker.py b/cosmos/providers/dbt/core/operators_docker.py index 2f3abb3e8..f9b6f286c 100644 --- a/cosmos/providers/dbt/core/operators_docker.py +++ b/cosmos/providers/dbt/core/operators_docker.py @@ -11,10 +11,8 @@ from airflow.utils.context import Context from airflow.utils.operator_helpers import context_to_airflow_vars -from cosmos.providers.dbt.core.utils.profiles_generator import ( - map_profile, - conn_exists -) +from cosmos.providers.dbt.core.utils.profiles_generator import conn_exists, map_profile + class DbtDockerBaseOperator(DockerOperator): """ @@ -210,9 +208,11 @@ def add_global_flags(self): def build_cmd(self, env: dict, cmd_flags: list = None): profile_vars = {} - if (conn_exists(conn_id=self.conn_id)): + if conn_exists(conn_id=self.conn_id): _, profile_vars = map_profile( - conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema + conn_id=self.conn_id, + db_override=self.db_name, + schema_override=self.schema, ) # parse dbt command diff --git a/cosmos/providers/dbt/core/operators_kubernetes.py b/cosmos/providers/dbt/core/operators_kubernetes.py index ca4dd9c85..ab98fff86 100644 --- a/cosmos/providers/dbt/core/operators_kubernetes.py +++ b/cosmos/providers/dbt/core/operators_kubernetes.py @@ -16,10 +16,8 @@ from airflow.utils.context import Context from airflow.utils.operator_helpers import context_to_airflow_vars -from cosmos.providers.dbt.core.utils.profiles_generator import ( - map_profile, - conn_exists -) +from cosmos.providers.dbt.core.utils.profiles_generator import conn_exists, map_profile + class DbtKubernetesBaseOperator(KubernetesPodOperator): """ @@ -225,9 +223,11 @@ def build_env_args(self, env: dict): def build_cmd(self, env: dict, cmd_flags: list = None): profile_vars = {} - if (conn_exists(conn_id=self.conn_id)): + if conn_exists(conn_id=self.conn_id): _, profile_vars = map_profile( - conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema + conn_id=self.conn_id, + db_override=self.db_name, + schema_override=self.schema, ) # parse dbt args diff --git a/cosmos/providers/dbt/core/utils/profiles_generator.py b/cosmos/providers/dbt/core/utils/profiles_generator.py index ef61aff2e..b83741ea7 100644 --- a/cosmos/providers/dbt/core/utils/profiles_generator.py +++ b/cosmos/providers/dbt/core/utils/profiles_generator.py @@ -8,9 +8,9 @@ import pkg_resources import yaml +from airflow.exceptions import AirflowNotFoundException from airflow.hooks.base import BaseHook from airflow.models.connection import Connection -from airflow.exceptions import AirflowNotFoundException from cosmos.providers.dbt.core.profiles.bigquery import bigquery_profile from cosmos.providers.dbt.core.profiles.databricks import databricks_profile @@ -256,9 +256,10 @@ def map_profile( return profile_vars_func(conn, database=db, schema=schema) + def conn_exists(conn_id: str) -> bool: try: BaseHook().get_connection(conn_id) return True except AirflowNotFoundException: - return False \ No newline at end of file + return False From e3152cf9cf58300e42268f620e1f124a90754512 Mon Sep 17 00:00:00 2001 From: Julien Guitton Date: Mon, 13 Feb 2023 10:31:04 +0100 Subject: [PATCH 10/33] New class to make the code as generic as possible --- cosmos/providers/dbt/__init__.py | 48 +++- cosmos/providers/dbt/core/operators.py | 236 ++-------------- cosmos/providers/dbt/core/operators_docker.py | 245 ++-------------- .../dbt/core/operators_kubernetes.py | 261 ++---------------- cosmos/providers/dbt/core/operators_local.py | 217 +++++++++++++++ cosmos/providers/dbt/render.py | 24 +- 6 files changed, 344 insertions(+), 687 deletions(-) create mode 100644 cosmos/providers/dbt/core/operators_local.py diff --git a/cosmos/providers/dbt/__init__.py b/cosmos/providers/dbt/__init__.py index 292679650..92435e3b1 100644 --- a/cosmos/providers/dbt/__init__.py +++ b/cosmos/providers/dbt/__init__.py @@ -3,12 +3,28 @@ """ # re-export the operators -from .core.operators import ( - DbtLSOperator, - DbtRunOperationOperator, - DbtRunOperator, - DbtSeedOperator, - DbtTestOperator, +from .core.operators_local import ( + DbtLSLocalOperator, + DbtRunOperationLocalOperator, + DbtRunLocalOperator, + DbtSeedLocalOperator, + DbtTestLocalOperator, +) + +from .core.operators_docker import ( + DbtLSDockerOperator, + DbtRunOperationDockerOperator, + DbtRunDockerOperator, + DbtSeedDockerOperator, + DbtTestDockerOperator, +) + +from .core.operators_kubernetes import ( + DbtLSKubernetesOperator, + DbtRunOperationKubernetesOperator, + DbtRunKubernetesOperator, + DbtSeedKubernetesOperator, + DbtTestKubernetesOperator, ) # re-export user facing utilities @@ -19,11 +35,21 @@ from .task_group import DbtTaskGroup __all__ = [ - DbtLSOperator, - DbtRunOperationOperator, - DbtRunOperator, - DbtSeedOperator, - DbtTestOperator, + DbtLSLocalOperator, + DbtRunOperationLocalOperator, + DbtRunLocalOperator, + DbtSeedLocalOperator, + DbtTestLocalOperator, + DbtLSDockerOperator, + DbtRunOperationDockerOperator, + DbtRunDockerOperator, + DbtSeedDockerOperator, + DbtTestDockerOperator, + DbtLSKubernetesOperator, + DbtRunOperationKubernetesOperator, + DbtRunKubernetesOperator, + DbtSeedKubernetesOperator, + DbtTestKubernetesOperator, get_dbt_dataset, DbtDag, DbtTaskGroup, diff --git a/cosmos/providers/dbt/core/operators.py b/cosmos/providers/dbt/core/operators.py index 3ae333e3c..1cbce128a 100644 --- a/cosmos/providers/dbt/core/operators.py +++ b/cosmos/providers/dbt/core/operators.py @@ -1,15 +1,10 @@ from __future__ import annotations import os -import signal -from typing import List, Sequence +from typing import List, Dict, Any, Sequence import yaml -from airflow.compat.functools import cached_property -from airflow.exceptions import AirflowException, AirflowSkipException -from airflow.hooks.subprocess import SubprocessHook from airflow.models.baseoperator import BaseOperator -from airflow.utils.context import Context from airflow.utils.operator_helpers import context_to_airflow_vars from cosmos.providers.dbt.core.utils.profiles_generator import ( @@ -78,6 +73,8 @@ class DbtBaseOperator(BaseOperator): :param dbt_executable_path: Path to dbt executable can be used with venv (i.e. /home/astro/.pyenv/versions/dbt_venv/bin/dbt) :type dbt_executable_path: str + :param dbt_cmd_flags: Flags passed to dbt command override those that are calculated. + :type dbt_cmd_flags: dict """ template_fields: Sequence[str] = ("env", "vars") @@ -105,6 +102,7 @@ def __init__( skip_exit_code: int = 99, cancel_query_on_kill: bool = True, dbt_executable_path: str = "dbt", + dbt_cmd_flags: Dict[str, Any] = {}, **kwargs, ) -> None: self.project_dir = project_dir @@ -128,13 +126,9 @@ def __init__( self.skip_exit_code = skip_exit_code self.cancel_query_on_kill = cancel_query_on_kill self.dbt_executable_path = dbt_executable_path + self.dbt_cmd_flags = dbt_cmd_flags super().__init__(**kwargs) - @cached_property - def subprocess_hook(self): - """Returns hook for running the bash command.""" - return SubprocessHook() - def get_env(self, context): """Builds the set of environment variables to be exposed for the bash command.""" system_env = os.environ.copy() @@ -155,20 +149,11 @@ def get_env(self, context): return env - def exception_handling(self, result): - if self.skip_exit_code is not None and result.exit_code == self.skip_exit_code: - raise AirflowSkipException( - f"dbt command returned exit code {self.skip_exit_code}. Skipping." - ) - elif result.exit_code != 0: - raise AirflowException( - f"dbt command failed. The command returned a non-zero exit code {result.exit_code}." - ) - def add_global_flags(self): global_flags = [ "project_dir", + "profiles_dir", "select", "exclude", "selector", @@ -179,7 +164,11 @@ def add_global_flags(self): flags = [] for global_flag in global_flags: dbt_name = f"--{global_flag.replace('_', '-')}" - global_flag_value = self.__getattribute__(global_flag) + + global_flag_value = self.dbt_cmd_flags.get(global_flag) + if global_flag_value is None: + global_flag_value = self.__getattribute__(global_flag) + if global_flag_value is not None: if isinstance(global_flag_value, dict): # handle dict @@ -199,39 +188,16 @@ def add_global_flags(self): ] for global_boolean_flag in global_boolean_flags: dbt_name = f"--{global_boolean_flag.replace('_', '-')}" - global_boolean_flag_value = self.__getattribute__(global_boolean_flag) + + global_boolean_flag_value = self.dbt_cmd_flags.get(global_boolean_flag) + if global_boolean_flag_value is None: + global_boolean_flag_value = self.__getattribute__(global_boolean_flag) + if global_boolean_flag_value is True: flags.append(dbt_name) return flags - def run_command(self, cmd, env): - # check project_dir - if self.project_dir is not None: - if not os.path.exists(self.project_dir): - raise AirflowException( - f"Can not find the project_dir: {self.project_dir}" - ) - if not os.path.isdir(self.project_dir): - raise AirflowException( - f"The project_dir {self.project_dir} must be a directory" - ) - - # run bash command - result = self.subprocess_hook.run_command( - command=cmd, - env=env, - output_encoding=self.output_encoding, - cwd=self.project_dir, - ) - self.exception_handling(result) - return result - - def build_and_run_cmd(self, env: dict, cmd_flags: list = None): - create_default_profiles() - profile, profile_vars = map_profile( - conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema - ) - + def build_cmd(self, env: dict, cmd_flags: list = None, handle_profile: bool = True): # parse dbt command dbt_cmd = [] @@ -254,158 +220,16 @@ def build_and_run_cmd(self, env: dict, cmd_flags: list = None): dbt_cmd.append(item) ## add profile - dbt_cmd.append("--profile") - dbt_cmd.append(profile) - - ## set env vars - env = {**env, **profile_vars} - result = self.run_command(cmd=dbt_cmd, env=env) - return result - - def execute(self, context: Context): - result = self.build_and_run_cmd(env=self.get_env(context)) - return result.output - - def on_kill(self) -> None: - if self.cancel_query_on_kill: - self.subprocess_hook.log.info("Sending SIGINT signal to process group") - if self.subprocess_hook.sub_process and hasattr( - self.subprocess_hook.sub_process, "pid" - ): - os.killpg( - os.getpgid(self.subprocess_hook.sub_process.pid), signal.SIGINT - ) - else: - self.subprocess_hook.send_sigterm() - - -class DbtLSOperator(DbtBaseOperator): - """ - Executes a dbt core ls command. - - """ - - ui_color = "#DBCDF6" - - def __init__(self, **kwargs) -> None: - super().__init__(**kwargs) - self.base_cmd = "ls" - - def execute(self, context: Context): - result = self.build_and_run_cmd(env=self.get_env(context)) - return result.output - - -class DbtSeedOperator(DbtBaseOperator): - """ - Executes a dbt core seed command. - - :param full_refresh: dbt optional arg - dbt will treat incremental models as table models - - """ - - ui_color = "#F58D7E" - - def __init__(self, full_refresh: bool = False, **kwargs) -> None: - self.full_refresh = full_refresh - super().__init__(**kwargs) - self.base_cmd = "seed" - - def add_cmd_flags(self): - flags = [] - if self.full_refresh is True: - flags.append("--full-refresh") - - return flags - - def execute(self, context: Context): - cmd_flags = self.add_cmd_flags() - result = self.build_and_run_cmd(env=self.get_env(context), cmd_flags=cmd_flags) - return result.output - - -class DbtRunOperator(DbtBaseOperator): - """ - Executes a dbt core run command. - - """ - - ui_color = "#7352BA" - ui_fgcolor = "#F4F2FC" - - def __init__(self, **kwargs) -> None: - super().__init__(**kwargs) - self.base_cmd = "run" - - def execute(self, context: Context): - result = self.build_and_run_cmd(env=self.get_env(context)) - return result.output - - -class DbtTestOperator(DbtBaseOperator): - """ - Executes a dbt core test command. - - """ - - ui_color = "#8194E0" - - def __init__(self, **kwargs) -> None: - super().__init__(**kwargs) - self.base_cmd = "test" - - def execute(self, context: Context): - result = self.build_and_run_cmd(env=self.get_env(context)) - return result.output - - -class DbtRunOperationOperator(DbtBaseOperator): - """ - Executes a dbt core run-operation command. - - :param macro_name: name of macro to execute - :type macro_name: str - :param args: Supply arguments to the macro. This dictionary will be mapped to the keyword arguments defined in the - selected macro. - :type args: dict - """ - - ui_color = "#8194E0" - template_fields: Sequence[str] = "args" - - def __init__(self, macro_name: str, args: dict = None, **kwargs) -> None: - self.macro_name = macro_name - self.args = args - super().__init__(**kwargs) - self.base_cmd = ["run-operation", macro_name] - - def add_cmd_flags(self): - flags = [] - if self.args is not None: - flags.append("--args") - flags.append(yaml.dump(self.args)) - return flags - - def execute(self, context: Context): - cmd_flags = self.add_cmd_flags() - result = self.build_and_run_cmd(env=self.get_env(context), cmd_flags=cmd_flags) - return result.output - - -class DbtDepsOperator(DbtBaseOperator): - """ - Executes a dbt core deps command. - - :param vars: Supply variables to the project. This argument overrides variables defined in your dbt_project.yml file - :type vars: dict - """ - - ui_color = "#8194E0" - - def __init__(self, **kwargs) -> None: - super().__init__(**kwargs) - self.base_cmd = "deps" - - def execute(self, context: Context): - result = self.build_and_run_cmd(env=self.get_env(context)) - return result.output + if handle_profile: + create_default_profiles() + profile, profile_vars = map_profile( + conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema + ) + + dbt_cmd.append("--profile") + dbt_cmd.append(profile) + + ## set env vars + env = {**env, **profile_vars} + + return dbt_cmd, env \ No newline at end of file diff --git a/cosmos/providers/dbt/core/operators_docker.py b/cosmos/providers/dbt/core/operators_docker.py index 2f3abb3e8..b1d6b11e9 100644 --- a/cosmos/providers/dbt/core/operators_docker.py +++ b/cosmos/providers/dbt/core/operators_docker.py @@ -1,248 +1,41 @@ from __future__ import annotations -import os -from typing import Any, Dict, List, Sequence +from typing import Sequence import yaml -from airflow.compat.functools import cached_property -from airflow.exceptions import AirflowException, AirflowSkipException -from airflow.hooks.subprocess import SubprocessHook from airflow.providers.docker.operators.docker import DockerOperator from airflow.utils.context import Context -from airflow.utils.operator_helpers import context_to_airflow_vars +from cosmos.providers.dbt.core.operators import DbtBaseOperator from cosmos.providers.dbt.core.utils.profiles_generator import ( map_profile, conn_exists ) -class DbtDockerBaseOperator(DockerOperator): +class DbtDockerBaseOperator(DockerOperator, DbtBaseOperator): """ - Executes a dbt core cli command. - - :param project_dir: Which directory to look in for the dbt_project.yml file. Default is the current working - directory and its parents. - :type project_dir: str - :param conn_id: The airflow connection to use as the target - :type conn_id: str - :param base_cmd: dbt sub-command to run (i.e ls, seed, run, test, etc.) - :type base_cmd: str | list[str] - :param select: dbt optional argument that specifies which nodes to include. - :type select: str - :param exclude: dbt optional argument that specifies which models to exclude. - :type exclude: str - :param selector: dbt optional argument - the selector name to use, as defined in selectors.yml - :type selector: str - :param vars: dbt optional argument - Supply variables to the project. This argument overrides variables - defined in your dbt_project.yml file. This argument should be a YAML - string, eg. '{my_variable: my_value}' (templated) - :type vars: dict - :param models: dbt optional argument that specifies which nodes to include. - :type models: str - :param cache_selected_only: - :type cache_selected_only: bool - :param no_version_check: dbt optional argument - If set, skip ensuring dbt's version matches the one specified in - the dbt_project.yml file ('require-dbt-version') - :type no_version_check: bool - :param fail_fast: dbt optional argument to make dbt exit immediately if a single resource fails to build. - :type fail_fast: bool - :param quiet: dbt optional argument to show only error logs in stdout - :type quiet: bool - :param warn_error: dbt optional argument to convert dbt warnings into errors - :type warn_error: bool - :param db_name: override the target db instead of the one supplied in the airflow connection - :type db_name: str - :param schema: override the target schema instead of the one supplied in the airflow connection - :type schema: str - :param env: If env is not None, it must be a dict that defines the - environment variables for the new process; these are used instead - of inheriting the current process environment, which is the default - behavior. (templated) - :type env: dict - :param append_env: If False(default) uses the environment variables passed in env params - and does not inherit the current process environment. If True, inherits the environment variables - from current passes and then environment variable passed by the user will either update the existing - inherited environment variables or the new variables gets appended to it - :type append_env: bool - :param output_encoding: Output encoding of bash command - :type output_encoding: str - :param skip_exit_code: If task exits with this exit code, leave the task - in ``skipped`` state (default: 99). If set to ``None``, any non-zero - exit code will be treated as a failure. - :type skip_exit_code: int - :param cancel_query_on_kill: If true, then cancel any running queries when the task's on_kill() is executed. - Otherwise, the query will keep running when the task is killed. - :type cancel_query_on_kill: bool - :param dbt_executable_path: Path to dbt executable can be used with venv - (i.e. /home/astro/.pyenv/versions/dbt_venv/bin/dbt) - :type dbt_executable_path: str + Executes a dbt core cli command in a Docker container. + """ - template_fields: Sequence[str] = ("env", "vars") + DockerOperator.template_fields + template_fields: Sequence[str] = DbtBaseOperator.template_fields + DockerOperator.template_fields def __init__( self, - project_dir: str, - conn_id: str, - base_cmd: str | List[str] = None, - select: str = None, - exclude: str = None, - selector: str = None, - vars: dict = None, - models: str = None, - cache_selected_only: bool = False, - no_version_check: bool = False, - fail_fast: bool = False, - quiet: bool = False, - warn_error: bool = False, - db_name: str = None, - schema: str = None, - env: dict = None, - append_env: bool = False, - output_encoding: str = "utf-8", - skip_exit_code: int = 99, - cancel_query_on_kill: bool = True, - dbt_executable_path: str = "dbt", - container_flags: Dict[str, Any] = {}, **kwargs, ) -> None: - self.project_dir = project_dir - self.conn_id = conn_id - self.base_cmd = base_cmd - self.select = select - self.exclude = exclude - self.selector = selector - self.vars = vars - self.models = models - self.cache_selected_only = cache_selected_only - self.no_version_check = no_version_check - self.fail_fast = fail_fast - self.quiet = quiet - self.warn_error = warn_error - self.db_name = db_name - self.schema = schema - self.env = env - self.append_env = append_env - self.output_encoding = output_encoding - self.skip_exit_code = skip_exit_code - self.cancel_query_on_kill = cancel_query_on_kill - self.dbt_executable_path = dbt_executable_path - self.container_flags = container_flags super().__init__(**kwargs) - - @cached_property - def subprocess_hook(self): - """Returns hook for running the bash command.""" - return SubprocessHook() - - def get_env(self, context): - """Builds the set of environment variables to be exposed for the bash command.""" - system_env = os.environ.copy() - env = self.env - if env is None: - env = system_env - else: - if self.append_env: - system_env.update(env) - env = system_env - - airflow_context_vars = context_to_airflow_vars(context, in_env_var_format=True) - self.log.debug( - "Exporting the following env vars:\n%s", - "\n".join(f"{k}={v}" for k, v in airflow_context_vars.items()), - ) - env.update(airflow_context_vars) - - return env - - def exception_handling(self, result): - if self.skip_exit_code is not None and result.exit_code == self.skip_exit_code: - raise AirflowSkipException( - f"dbt command returned exit code {self.skip_exit_code}. Skipping." - ) - elif result.exit_code != 0: - raise AirflowException( - f"dbt command failed. The command returned a non-zero exit code {result.exit_code}." - ) - - def add_global_flags(self): - global_flags = [ - "project_dir", - "profiles_dir", - "select", - "exclude", - "selector", - "vars", - "models", - ] - - flags = [] - for global_flag in global_flags: - dbt_name = f"--{global_flag.replace('_', '-')}" - - global_flag_value = self.container_flags.get(global_flag) - if global_flag_value is None: - global_flag_value = self.__getattribute__(global_flag) - - if global_flag_value is not None: - if isinstance(global_flag_value, dict): - # handle dict - yaml_string = yaml.dump(global_flag_value) - flags.append(dbt_name) - flags.append(yaml_string) - else: - flags.append(dbt_name) - flags.append(str(global_flag_value)) - - global_boolean_flags = [ - "no_version_check", - "cache_selected_only", - "fail_fast", - "quiet", - "warn_error", - ] - for global_boolean_flag in global_boolean_flags: - dbt_name = f"--{global_boolean_flag.replace('_', '-')}" - global_boolean_flag_value = self.__getattribute__(global_boolean_flag) - if global_boolean_flag_value is True: - flags.append(dbt_name) - return flags - - def build_cmd(self, env: dict, cmd_flags: list = None): - profile_vars = {} - if (conn_exists(conn_id=self.conn_id)): - _, profile_vars = map_profile( - conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema - ) - - # parse dbt command - dbt_cmd = [] - - ## start with the dbt executable - dbt_cmd.append(self.dbt_executable_path) - - ## add base cmd - if isinstance(self.base_cmd, str): - dbt_cmd.append(self.base_cmd) - else: - [dbt_cmd.append(item) for item in self.base_cmd] - - # add global flags - for item in self.add_global_flags(): - dbt_cmd.append(item) - - ## add command specific flags - if cmd_flags: - for item in cmd_flags: - dbt_cmd.append(item) - + + + def build_cmd_and_args(self, env: dict, cmd_flags: list = None): + dbt_cmd, env_vars = self.build_cmd(env=env, cmd_flags=cmd_flags, handle_profile=False) + ## set env vars - self.environment = {**env, **profile_vars, **self.environment} + self.environment = {**env_vars, **self.environment} self.command = dbt_cmd - - -class DbtLSOperator(DbtDockerBaseOperator): + +class DbtLSDockerOperator(DbtDockerBaseOperator): """ Executes a dbt core ls command. @@ -259,7 +52,7 @@ def execute(self, context: Context): return super().execute(context) -class DbtSeedOperator(DbtDockerBaseOperator): +class DbtSeedDockerOperator(DbtDockerBaseOperator): """ Executes a dbt core seed command. @@ -287,7 +80,7 @@ def execute(self, context: Context): return super().execute(context) -class DbtRunOperator(DbtDockerBaseOperator): +class DbtRunDockerOperator(DbtDockerBaseOperator): """ Executes a dbt core run command. @@ -305,7 +98,7 @@ def execute(self, context: Context): return super().execute(context) -class DbtTestOperator(DbtDockerBaseOperator): +class DbtTestDockerOperator(DbtDockerBaseOperator): """ Executes a dbt core test command. @@ -322,7 +115,7 @@ def execute(self, context: Context): return super().execute(context) -class DbtRunOperationOperator(DbtDockerBaseOperator): +class DbtRunOperationDockerOperator(DbtDockerBaseOperator): """ Executes a dbt core run-operation command. @@ -355,7 +148,7 @@ def execute(self, context: Context): return super().execute(context) -class DbtDepsOperator(DbtDockerBaseOperator): +class DbtDepsDockerOperator(DbtDockerBaseOperator): """ Executes a dbt core deps command. diff --git a/cosmos/providers/dbt/core/operators_kubernetes.py b/cosmos/providers/dbt/core/operators_kubernetes.py index ca4dd9c85..c058b2f54 100644 --- a/cosmos/providers/dbt/core/operators_kubernetes.py +++ b/cosmos/providers/dbt/core/operators_kubernetes.py @@ -1,12 +1,8 @@ from __future__ import annotations -import os -from typing import Any, Dict, List, Sequence +from typing import Sequence import yaml -from airflow.compat.functools import cached_property -from airflow.exceptions import AirflowException, AirflowSkipException -from airflow.hooks.subprocess import SubprocessHook from airflow.providers.cncf.kubernetes.backcompat.backwards_compat_converters import ( convert_env_vars, ) @@ -14,208 +10,28 @@ KubernetesPodOperator, ) from airflow.utils.context import Context -from airflow.utils.operator_helpers import context_to_airflow_vars +from cosmos.providers.dbt.core.operators import DbtBaseOperator from cosmos.providers.dbt.core.utils.profiles_generator import ( map_profile, conn_exists ) -class DbtKubernetesBaseOperator(KubernetesPodOperator): + +class DbtKubernetesBaseOperator(KubernetesPodOperator, DbtBaseOperator): """ - Executes a dbt core cli command. - - :param project_dir: Which directory to look in for the dbt_project.yml file. Default is the current working - directory and its parents. - :type project_dir: str - :param conn_id: The airflow connection to use as the target - :type conn_id: str - :param base_cmd: dbt sub-command to run (i.e ls, seed, run, test, etc.) - :type base_cmd: str | list[str] - :param select: dbt optional argument that specifies which nodes to include. - :type select: str - :param exclude: dbt optional argument that specifies which models to exclude. - :type exclude: str - :param selector: dbt optional argument - the selector name to use, as defined in selectors.yml - :type selector: str - :param vars: dbt optional argument - Supply variables to the project. This argument overrides variables - defined in your dbt_project.yml file. This argument should be a YAML - string, eg. '{my_variable: my_value}' (templated) - :type vars: dict - :param models: dbt optional argument that specifies which nodes to include. - :type models: str - :param cache_selected_only: - :type cache_selected_only: bool - :param no_version_check: dbt optional argument - If set, skip ensuring dbt's version matches the one specified in - the dbt_project.yml file ('require-dbt-version') - :type no_version_check: bool - :param fail_fast: dbt optional argument to make dbt exit immediately if a single resource fails to build. - :type fail_fast: bool - :param quiet: dbt optional argument to show only error logs in stdout - :type quiet: bool - :param warn_error: dbt optional argument to convert dbt warnings into errors - :type warn_error: bool - :param db_name: override the target db instead of the one supplied in the airflow connection - :type db_name: str - :param schema: override the target schema instead of the one supplied in the airflow connection - :type schema: str - :param env: If env is not None, it must be a dict that defines the - environment variables for the new process; these are used instead - of inheriting the current process environment, which is the default - behavior. (templated) - :type env: dict - :param append_env: If False(default) uses the environment variables passed in env params - and does not inherit the current process environment. If True, inherits the environment variables - from current passes and then environment variable passed by the user will either update the existing - inherited environment variables or the new variables gets appended to it - :type append_env: bool - :param output_encoding: Output encoding of bash command - :type output_encoding: str - :param skip_exit_code: If task exits with this exit code, leave the task - in ``skipped`` state (default: 99). If set to ``None``, any non-zero - exit code will be treated as a failure. - :type skip_exit_code: int - :param cancel_query_on_kill: If true, then cancel any running queries when the task's on_kill() is executed. - Otherwise, the query will keep running when the task is killed. - :type cancel_query_on_kill: bool - :param dbt_executable_path: Path to dbt executable can be used with venv - (i.e. /home/astro/.pyenv/versions/dbt_venv/bin/dbt) - :type dbt_executable_path: str + Executes a dbt core cli command in a Kubernetes Pod. + """ - template_fields: Sequence[str] = ( - "env", - "vars", - ) + KubernetesPodOperator.template_fields + template_fields: Sequence[str] = DbtBaseOperator.template_fields + KubernetesPodOperator.template_fields def __init__( self, - project_dir: str, - conn_id: str, - base_cmd: str | List[str] = None, - select: str = None, - exclude: str = None, - selector: str = None, - vars: dict = None, - models: str = None, - cache_selected_only: bool = False, - no_version_check: bool = False, - fail_fast: bool = False, - quiet: bool = False, - warn_error: bool = False, - db_name: str = None, - schema: str = None, - env: dict = None, - append_env: bool = False, - output_encoding: str = "utf-8", - skip_exit_code: int = 99, - cancel_query_on_kill: bool = True, - dbt_executable_path: str = "dbt", - container_flags: Dict[str, Any] = {}, **kwargs, ) -> None: - self.project_dir = project_dir - self.conn_id = conn_id - self.base_cmd = base_cmd - self.select = select - self.exclude = exclude - self.selector = selector - self.vars = vars - self.models = models - self.cache_selected_only = cache_selected_only - self.no_version_check = no_version_check - self.fail_fast = fail_fast - self.quiet = quiet - self.warn_error = warn_error - self.db_name = db_name - self.schema = schema - self.env = env - self.append_env = append_env - self.output_encoding = output_encoding - self.skip_exit_code = skip_exit_code - self.cancel_query_on_kill = cancel_query_on_kill - self.dbt_executable_path = dbt_executable_path - self.container_flags = container_flags super().__init__(**kwargs) - @cached_property - def subprocess_hook(self): - """Returns hook for running the bash command.""" - return SubprocessHook() - - def get_env(self, context): - """Builds the set of environment variables to be exposed for the bash command.""" - system_env = os.environ.copy() - env = self.env - if env is None: - env = system_env - else: - if self.append_env: - system_env.update(env) - env = system_env - - airflow_context_vars = context_to_airflow_vars(context, in_env_var_format=True) - self.log.debug( - "Exporting the following env vars:\n%s", - "\n".join(f"{k}={v}" for k, v in airflow_context_vars.items()), - ) - env.update(airflow_context_vars) - - return env - - def exception_handling(self, result): - if self.skip_exit_code is not None and result.exit_code == self.skip_exit_code: - raise AirflowSkipException( - f"dbt command returned exit code {self.skip_exit_code}. Skipping." - ) - elif result.exit_code != 0: - raise AirflowException( - f"dbt command failed. The command returned a non-zero exit code {result.exit_code}." - ) - - def add_global_flags(self): - global_flags = [ - "project_dir", - "profiles_dir", - "select", - "exclude", - "selector", - "vars", - "models", - ] - - flags = [] - for global_flag in global_flags: - dbt_name = f"--{global_flag.replace('_', '-')}" - - global_flag_value = self.container_flags.get(global_flag) - if global_flag_value is None: - global_flag_value = self.__getattribute__(global_flag) - - if global_flag_value is not None: - if isinstance(global_flag_value, dict): - # handle dict - yaml_string = yaml.dump(global_flag_value) - flags.append(dbt_name) - flags.append(yaml_string) - else: - flags.append(dbt_name) - flags.append(str(global_flag_value)) - - global_boolean_flags = [ - "no_version_check", - "cache_selected_only", - "fail_fast", - "quiet", - "warn_error", - ] - for global_boolean_flag in global_boolean_flags: - dbt_name = f"--{global_boolean_flag.replace('_', '-')}" - global_boolean_flag_value = self.__getattribute__(global_boolean_flag) - if global_boolean_flag_value is True: - flags.append(dbt_name) - return flags - def build_env_args(self, env: dict): env_vars_dict = {} for env_var in self.env_vars: @@ -223,41 +39,16 @@ def build_env_args(self, env: dict): self.env_vars = convert_env_vars({**env, **env_vars_dict}) - def build_cmd(self, env: dict, cmd_flags: list = None): - profile_vars = {} - if (conn_exists(conn_id=self.conn_id)): - _, profile_vars = map_profile( - conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema - ) - - # parse dbt args - dbt_args = [] - - ## start with the dbt executable - self.cmds = [self.dbt_executable_path] - - ## add base cmd - if isinstance(self.base_cmd, str): - dbt_args.append(self.base_cmd) - else: - [dbt_args.append(item) for item in self.base_cmd] - - # add global flags - for item in self.add_global_flags(): - dbt_args.append(item) - - ## add command specific flags - if cmd_flags: - for item in cmd_flags: - dbt_args.append(item) - + def build_cmd_and_args(self, env: dict, cmd_flags: list = None): + dbt_cmd, env_vars = self.build_cmd(env=env, cmd_flags=cmd_flags, handle_profile=False) + self.cmds = [dbt_cmd.pop(0)] + ## set env vars - self.build_env_args({**env, **profile_vars}) - - self.arguments = dbt_args - + self.build_env_args(env_vars) + + self.arguments = dbt_cmd -class DbtLSOperator(DbtKubernetesBaseOperator): +class DbtLSKubernetesOperator(DbtKubernetesBaseOperator): """ Executes a dbt core ls command. @@ -270,11 +61,11 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "ls" def execute(self, context: Context): - self.build_cmd(env=self.get_env(context)) + self.build_cmd_and_args(env=self.get_env(context)) return super().execute(context) -class DbtSeedOperator(DbtKubernetesBaseOperator): +class DbtSeedKubernetesOperator(DbtKubernetesBaseOperator): """ Executes a dbt core seed command. @@ -298,11 +89,11 @@ def add_cmd_flags(self): def execute(self, context: Context): cmd_flags = self.add_cmd_flags() - self.build_cmd(env=self.get_env(context), cmd_flags=cmd_flags) + self.build_cmd_and_args(env=self.get_env(context), cmd_flags=cmd_flags) return super().execute(context) -class DbtRunOperator(DbtKubernetesBaseOperator): +class DbtRunKubernetesOperator(DbtKubernetesBaseOperator): """ Executes a dbt core run command. @@ -316,11 +107,11 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "run" def execute(self, context: Context): - self.build_cmd(env=self.get_env(context)) + self.build_cmd_and_args(env=self.get_env(context)) return super().execute(context) -class DbtTestOperator(DbtKubernetesBaseOperator): +class DbtTestKubernetesOperator(DbtKubernetesBaseOperator): """ Executes a dbt core test command. @@ -333,11 +124,11 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "test" def execute(self, context: Context): - self.build_cmd(env=self.get_env(context)) + self.build_cmd_and_args(env=self.get_env(context)) return super().execute(context) -class DbtRunOperationOperator(DbtKubernetesBaseOperator): +class DbtRunOperationKubernetesOperator(DbtKubernetesBaseOperator): """ Executes a dbt core run-operation command. @@ -366,11 +157,11 @@ def add_cmd_flags(self): def execute(self, context: Context): cmd_flags = self.add_cmd_flags() - self.build_cmd(env=self.get_env(context), cmd_flags=cmd_flags) + self.build_cmd_and_args(env=self.get_env(context), cmd_flags=cmd_flags) return super().execute(context) -class DbtDepsOperator(DbtKubernetesBaseOperator): +class DbtDepsKubernetesOperator(DbtKubernetesBaseOperator): """ Executes a dbt core deps command. @@ -385,5 +176,5 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "deps" def execute(self, context: Context): - self.build_cmd(env=self.get_env(context)) + self.build_cmd_and_args(env=self.get_env(context)) return super().execute(context) diff --git a/cosmos/providers/dbt/core/operators_local.py b/cosmos/providers/dbt/core/operators_local.py new file mode 100644 index 000000000..e55ff17e6 --- /dev/null +++ b/cosmos/providers/dbt/core/operators_local.py @@ -0,0 +1,217 @@ +from __future__ import annotations + +import os +import signal +from typing import Sequence + +import yaml +from airflow.compat.functools import cached_property +from airflow.exceptions import AirflowException, AirflowSkipException +from airflow.hooks.subprocess import SubprocessHook +from airflow.utils.context import Context + +from cosmos.providers.dbt.core.operators import DbtBaseOperator + +class DbtLocalBaseOperator(DbtBaseOperator): + """ + Executes a dbt core cli command locally. + + """ + + template_fields: Sequence[str] = DbtBaseOperator.template_fields + + def __init__( + self, + **kwargs, + ) -> None: + super().__init__(**kwargs) + + @cached_property + def subprocess_hook(self): + """Returns hook for running the bash command.""" + return SubprocessHook() + + def exception_handling(self, result): + if self.skip_exit_code is not None and result.exit_code == self.skip_exit_code: + raise AirflowSkipException( + f"dbt command returned exit code {self.skip_exit_code}. Skipping." + ) + elif result.exit_code != 0: + raise AirflowException( + f"dbt command failed. The command returned a non-zero exit code {result.exit_code}." + ) + + def run_command(self, cmd, env): + # check project_dir + if self.project_dir is not None: + if not os.path.exists(self.project_dir): + raise AirflowException( + f"Can not find the project_dir: {self.project_dir}" + ) + if not os.path.isdir(self.project_dir): + raise AirflowException( + f"The project_dir {self.project_dir} must be a directory" + ) + + # run bash command + result = self.subprocess_hook.run_command( + command=cmd, + env=env, + output_encoding=self.output_encoding, + cwd=self.project_dir, + ) + self.exception_handling(result) + return result + + def build_and_run_cmd(self, env: dict, cmd_flags: list = None): + dbt_cmd, env = self.build_cmd(env=env, cmd_flags=cmd_flags) + result = self.run_command(cmd=dbt_cmd, env=env) + return result + + def execute(self, context: Context): + result = self.build_and_run_cmd(env=self.get_env(context)) + return result.output + + def on_kill(self) -> None: + if self.cancel_query_on_kill: + self.subprocess_hook.log.info("Sending SIGINT signal to process group") + if self.subprocess_hook.sub_process and hasattr( + self.subprocess_hook.sub_process, "pid" + ): + os.killpg( + os.getpgid(self.subprocess_hook.sub_process.pid), signal.SIGINT + ) + else: + self.subprocess_hook.send_sigterm() + + +class DbtLSLocalOperator(DbtLocalBaseOperator): + """ + Executes a dbt core ls command locally. + + """ + + ui_color = "#DBCDF6" + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.base_cmd = "ls" + + def execute(self, context: Context): + result = self.build_and_run_cmd(env=self.get_env(context)) + return result.output + + +class DbtSeedLocalOperator(DbtLocalBaseOperator): + """ + Executes a dbt core seed command locally. + + :param full_refresh: dbt optional arg - dbt will treat incremental models as table models + + """ + + ui_color = "#F58D7E" + + def __init__(self, full_refresh: bool = False, **kwargs) -> None: + self.full_refresh = full_refresh + super().__init__(**kwargs) + self.base_cmd = "seed" + + def add_cmd_flags(self): + flags = [] + if self.full_refresh is True: + flags.append("--full-refresh") + + return flags + + def execute(self, context: Context): + cmd_flags = self.add_cmd_flags() + result = self.build_and_run_cmd(env=self.get_env(context), cmd_flags=cmd_flags) + return result.output + + +class DbtRunLocalOperator(DbtLocalBaseOperator): + """ + Executes a dbt core run command locally. + + """ + + ui_color = "#7352BA" + ui_fgcolor = "#F4F2FC" + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.base_cmd = "run" + + def execute(self, context: Context): + result = self.build_and_run_cmd(env=self.get_env(context)) + return result.output + + +class DbtTestLocalOperator(DbtLocalBaseOperator): + """ + Executes a dbt core test command locally. + + """ + + ui_color = "#8194E0" + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.base_cmd = "test" + + def execute(self, context: Context): + result = self.build_and_run_cmd(env=self.get_env(context)) + return result.output + + +class DbtRunOperationLocalOperator(DbtLocalBaseOperator): + """ + Executes a dbt core run-operation command locally. + + :param macro_name: name of macro to execute + :type macro_name: str + :param args: Supply arguments to the macro. This dictionary will be mapped to the keyword arguments defined in the + selected macro. + :type args: dict + """ + + ui_color = "#8194E0" + template_fields: Sequence[str] = "args" + + def __init__(self, macro_name: str, args: dict = None, **kwargs) -> None: + self.macro_name = macro_name + self.args = args + super().__init__(**kwargs) + self.base_cmd = ["run-operation", macro_name] + + def add_cmd_flags(self): + flags = [] + if self.args is not None: + flags.append("--args") + flags.append(yaml.dump(self.args)) + return flags + + def execute(self, context: Context): + cmd_flags = self.add_cmd_flags() + result = self.build_and_run_cmd(env=self.get_env(context), cmd_flags=cmd_flags) + return result.output + + +class DbtDepsLocalOperator(DbtLocalBaseOperator): + """ + Executes a dbt core deps command locally. + + :param vars: Supply variables to the project. This argument overrides variables defined in your dbt_project.yml file + :type vars: dict + """ + + ui_color = "#8194E0" + + def __init__(self, **kwargs) -> None: + super().__init__(**kwargs) + self.base_cmd = "deps" + + def execute(self, context: Context): + result = self.build_and_run_cmd(env=self.get_env(context)) + return result.output diff --git a/cosmos/providers/dbt/render.py b/cosmos/providers/dbt/render.py index 34ba0aab9..e7a874ce7 100644 --- a/cosmos/providers/dbt/render.py +++ b/cosmos/providers/dbt/render.py @@ -19,6 +19,17 @@ logger = logging.getLogger(__name__) +def calculate_operator_class( + execution_mode: str = None, + dbt_class: str = None, +) -> str: + if execution_mode == "kubernetes": + return f"cosmos.providers.dbt.core.operators_kubernetes.{dbt_class}KubernetesOperator" + elif execution_mode == "docker": + return f"cosmos.providers.dbt.core.operators_docker.{dbt_class}DockerOperator" + else: + return f"cosmos.providers.dbt.core.operators_local.{dbt_class}LocalOperator" + def render_project( dbt_project_name: str, dbt_root_path: str = "/usr/local/airflow/dbt", @@ -43,6 +54,7 @@ def render_project( :param conn_id: The Airflow connection ID to use in Airflow Datasets :param select: A dict of dbt selector arguments (i.e., {"tags": ["tag_1", "tag_2"]}) :param exclude: A dict of dbt exclude arguments (i.e., {"tags": ["tag_1", "tag_2]}) + :param execution_mode: Execution mode of the Airflow task (local, docker or kubernetes) """ # first, get the dbt project project = DbtProject( @@ -67,12 +79,6 @@ def render_project( f"{set(select['tags']).intersection(exclude['tags'])}" ) - operator_class = "" - if execution_mode == "docker": - operator_class = "_docker" - elif execution_mode == "kubernetes": - operator_class = "_kubernetes" - if "paths" in select and "paths" in exclude: if set(select["paths"]).intersection(exclude["paths"]): raise AirflowException( @@ -121,7 +127,7 @@ def render_project( # make the run task run_task = Task( id=f"{model_name}_run", - operator_class=f"cosmos.providers.dbt.core.operators{operator_class}.DbtRunOperator", + operator_class=calculate_operator_class(execution_mode=execution_mode, dbt_class="DbtRun"), arguments=run_args, ) @@ -137,7 +143,7 @@ def render_project( test_task = Task( id=f"{model_name}_test", - operator_class=f"cosmos.providers.dbt.core.operators{operator_class}.DbtTestOperator", + operator_class=calculate_operator_class(execution_mode=execution_mode, dbt_class="DbtTest"), upstream_entity_ids=[run_task.id], arguments=test_args, ) @@ -169,7 +175,7 @@ def render_project( # make a test task test_task = Task( id=f"{dbt_project_name}_test", - operator_class=f"cosmos.providers.dbt.core.operators{operator_class}.DbtTestOperator", + operator_class=calculate_operator_class(execution_mode=execution_mode, dbt_class="DbtTest"), arguments=task_args, ) entities[test_task.id] = test_task From 5bbaf2fbbfb365c2009e2bd7880e3ec7ffb9054e Mon Sep 17 00:00:00 2001 From: Julien Guitton Date: Mon, 13 Feb 2023 10:54:12 +0100 Subject: [PATCH 11/33] Fix needed attribute and add log to show the built command --- cosmos/providers/dbt/core/operators.py | 4 ++++ cosmos/providers/dbt/core/operators_docker.py | 1 + cosmos/providers/dbt/core/operators_kubernetes.py | 3 ++- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cosmos/providers/dbt/core/operators.py b/cosmos/providers/dbt/core/operators.py index 1cbce128a..8a5e287cd 100644 --- a/cosmos/providers/dbt/core/operators.py +++ b/cosmos/providers/dbt/core/operators.py @@ -75,6 +75,8 @@ class DbtBaseOperator(BaseOperator): :type dbt_executable_path: str :param dbt_cmd_flags: Flags passed to dbt command override those that are calculated. :type dbt_cmd_flags: dict + :param profiles_dir: Which directory to look in for the profiles.yml file. + :type profiles_dir: str """ template_fields: Sequence[str] = ("env", "vars") @@ -103,6 +105,7 @@ def __init__( cancel_query_on_kill: bool = True, dbt_executable_path: str = "dbt", dbt_cmd_flags: Dict[str, Any] = {}, + profiles_dir: str = None, **kwargs, ) -> None: self.project_dir = project_dir @@ -127,6 +130,7 @@ def __init__( self.cancel_query_on_kill = cancel_query_on_kill self.dbt_executable_path = dbt_executable_path self.dbt_cmd_flags = dbt_cmd_flags + self.profiles_dir = profiles_dir super().__init__(**kwargs) def get_env(self, context): diff --git a/cosmos/providers/dbt/core/operators_docker.py b/cosmos/providers/dbt/core/operators_docker.py index b1d6b11e9..6f42d5fca 100644 --- a/cosmos/providers/dbt/core/operators_docker.py +++ b/cosmos/providers/dbt/core/operators_docker.py @@ -34,6 +34,7 @@ def build_cmd_and_args(self, env: dict, cmd_flags: list = None): self.environment = {**env_vars, **self.environment} self.command = dbt_cmd + self.log.info(f"Building command: {self.command}") class DbtLSDockerOperator(DbtDockerBaseOperator): """ diff --git a/cosmos/providers/dbt/core/operators_kubernetes.py b/cosmos/providers/dbt/core/operators_kubernetes.py index c058b2f54..6b28814b7 100644 --- a/cosmos/providers/dbt/core/operators_kubernetes.py +++ b/cosmos/providers/dbt/core/operators_kubernetes.py @@ -46,7 +46,8 @@ def build_cmd_and_args(self, env: dict, cmd_flags: list = None): ## set env vars self.build_env_args(env_vars) - self.arguments = dbt_cmd + self.arguments = dbt_cmd + self.log.info(f"Building command: {self.cmds} {self.arguments}") class DbtLSKubernetesOperator(DbtKubernetesBaseOperator): """ From bd11701794ec6764836ea5e6a63d7893396ea427 Mon Sep 17 00:00:00 2001 From: Julien Guitton Date: Mon, 13 Feb 2023 11:14:51 +0100 Subject: [PATCH 12/33] Add attributes to configure profiles_dir for all the execution mode --- cosmos/providers/dbt/core/operators.py | 4 ++-- cosmos/providers/dbt/core/operators_docker.py | 16 ++++++---------- .../providers/dbt/core/operators_kubernetes.py | 4 ---- .../dbt/core/utils/profiles_generator.py | 11 +++++++---- cosmos/providers/dbt/dag.py | 3 +++ cosmos/providers/dbt/render.py | 5 +++++ cosmos/providers/dbt/task_group.py | 3 +++ 7 files changed, 26 insertions(+), 20 deletions(-) diff --git a/cosmos/providers/dbt/core/operators.py b/cosmos/providers/dbt/core/operators.py index 8a5e287cd..62daf4e0e 100644 --- a/cosmos/providers/dbt/core/operators.py +++ b/cosmos/providers/dbt/core/operators.py @@ -75,7 +75,7 @@ class DbtBaseOperator(BaseOperator): :type dbt_executable_path: str :param dbt_cmd_flags: Flags passed to dbt command override those that are calculated. :type dbt_cmd_flags: dict - :param profiles_dir: Which directory to look in for the profiles.yml file. + :param profiles_dir: Which directory to look in for the profiles.yml file. Default is ~/.dbt/profiles.yml. :type profiles_dir: str """ @@ -225,7 +225,7 @@ def build_cmd(self, env: dict, cmd_flags: list = None, handle_profile: bool = Tr ## add profile if handle_profile: - create_default_profiles() + create_default_profiles(self.profiles_dir) profile, profile_vars = map_profile( conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema ) diff --git a/cosmos/providers/dbt/core/operators_docker.py b/cosmos/providers/dbt/core/operators_docker.py index 6f42d5fca..b7619675a 100644 --- a/cosmos/providers/dbt/core/operators_docker.py +++ b/cosmos/providers/dbt/core/operators_docker.py @@ -7,10 +7,6 @@ from airflow.utils.context import Context from cosmos.providers.dbt.core.operators import DbtBaseOperator -from cosmos.providers.dbt.core.utils.profiles_generator import ( - map_profile, - conn_exists -) class DbtDockerBaseOperator(DockerOperator, DbtBaseOperator): """ @@ -49,7 +45,7 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "ls" def execute(self, context: Context): - self.build_cmd(env=self.get_env(context)) + self.build_cmd_and_args(env=self.get_env(context)) return super().execute(context) @@ -77,7 +73,7 @@ def add_cmd_flags(self): def execute(self, context: Context): cmd_flags = self.add_cmd_flags() - self.build_cmd(env=self.get_env(context), cmd_flags=cmd_flags) + self.build_cmd_and_args(env=self.get_env(context), cmd_flags=cmd_flags) return super().execute(context) @@ -95,7 +91,7 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "run" def execute(self, context: Context): - self.build_cmd(env=self.get_env(context)) + self.build_cmd_and_args(env=self.get_env(context)) return super().execute(context) @@ -112,7 +108,7 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "test" def execute(self, context: Context): - self.build_cmd(env=self.get_env(context)) + self.build_cmd_and_args(env=self.get_env(context)) return super().execute(context) @@ -145,7 +141,7 @@ def add_cmd_flags(self): def execute(self, context: Context): cmd_flags = self.add_cmd_flags() - self.build_cmd(env=self.get_env(context), cmd_flags=cmd_flags) + self.build_cmd_and_args(env=self.get_env(context), cmd_flags=cmd_flags) return super().execute(context) @@ -164,5 +160,5 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "deps" def execute(self, context: Context): - self.build_cmd(env=self.get_env(context)) + self.build_cmd_and_args(env=self.get_env(context)) return super().execute(context) diff --git a/cosmos/providers/dbt/core/operators_kubernetes.py b/cosmos/providers/dbt/core/operators_kubernetes.py index 6b28814b7..886d958d0 100644 --- a/cosmos/providers/dbt/core/operators_kubernetes.py +++ b/cosmos/providers/dbt/core/operators_kubernetes.py @@ -12,10 +12,6 @@ from airflow.utils.context import Context from cosmos.providers.dbt.core.operators import DbtBaseOperator -from cosmos.providers.dbt.core.utils.profiles_generator import ( - map_profile, - conn_exists -) class DbtKubernetesBaseOperator(KubernetesPodOperator, DbtBaseOperator): diff --git a/cosmos/providers/dbt/core/utils/profiles_generator.py b/cosmos/providers/dbt/core/utils/profiles_generator.py index ef61aff2e..958711b04 100644 --- a/cosmos/providers/dbt/core/utils/profiles_generator.py +++ b/cosmos/providers/dbt/core/utils/profiles_generator.py @@ -21,7 +21,7 @@ logger = logging.getLogger(__name__) -def create_default_profiles(): +def create_default_profiles(profiles_dir: str = None): # get installed version of astronomer-cosmos try: package = pkg_resources.get_distribution("astronomer-cosmos") @@ -36,9 +36,12 @@ def create_default_profiles(): "databricks_profile": databricks_profile, } - # Define the path to the directory and file - home_dir = os.path.expanduser("~") - file_path = f"{home_dir}/.dbt/profiles.yml" + # Define the path to the directory and file + if profiles_dir != None: + file_path = f"{profiles_dir}/profiles.yml" + else: + home_dir = os.path.expanduser("~") + file_path = f"{home_dir}/.dbt/profiles.yml" # Create the file if it does not exist profile_file = Path(file_path) diff --git a/cosmos/providers/dbt/dag.py b/cosmos/providers/dbt/dag.py index 75198c9f9..899367d80 100644 --- a/cosmos/providers/dbt/dag.py +++ b/cosmos/providers/dbt/dag.py @@ -28,6 +28,7 @@ class DbtDag(CosmosDag): Defaults to "after_each" :param select: A dict of dbt selector arguments (i.e., {"tags": ["tag_1", "tag_2"]}) :param exclude: A dict of dbt exclude arguments (i.e., {"tags": ["tag_1", "tag_2"]}) + :param dbt_profiles_dir: Which directory to look in for the profiles.yml file. Default is ~/.dbt/profiles.yml. """ def __init__( @@ -42,6 +43,7 @@ def __init__( select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, execution_mode: str = None, + dbt_profiles_dir: str = None, *args: Any, **kwargs: Any, ) -> None: @@ -63,6 +65,7 @@ def __init__( select=select, exclude=exclude, execution_mode=execution_mode, + dbt_profiles_dir=dbt_profiles_dir, ) # call the airflow DAG constructor diff --git a/cosmos/providers/dbt/render.py b/cosmos/providers/dbt/render.py index e7a874ce7..62a18ffcc 100644 --- a/cosmos/providers/dbt/render.py +++ b/cosmos/providers/dbt/render.py @@ -41,6 +41,7 @@ def render_project( select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, execution_mode: str = None, + dbt_profiles_dir: str = None, ) -> Group: """ Turn a dbt project into a Group @@ -55,6 +56,7 @@ def render_project( :param select: A dict of dbt selector arguments (i.e., {"tags": ["tag_1", "tag_2"]}) :param exclude: A dict of dbt exclude arguments (i.e., {"tags": ["tag_1", "tag_2]}) :param execution_mode: Execution mode of the Airflow task (local, docker or kubernetes) + :param dbt_profiles_dir: Which directory to look in for the profiles.yml file. Default is ~/.dbt/profiles.yml. """ # first, get the dbt project project = DbtProject( @@ -70,6 +72,9 @@ def render_project( # add project_dir arg to task_args task_args["project_dir"] = project.project_dir + + # add profiles_dir arg to task_args + task_args["profiles_dir"] = dbt_profiles_dir # ensures the same tag isn't in select & exclude if "tags" in select and "tags" in exclude: diff --git a/cosmos/providers/dbt/task_group.py b/cosmos/providers/dbt/task_group.py index e40fce60f..3328fccb2 100644 --- a/cosmos/providers/dbt/task_group.py +++ b/cosmos/providers/dbt/task_group.py @@ -28,6 +28,7 @@ class DbtTaskGroup(CosmosTaskGroup): Defaults to "after_each" :param select: A dict of dbt selector arguments (i.e., {"tags": ["tag_1", "tag_2"]}) :param exclude: A dict of dbt exclude arguments (i.e., {"tags": ["tag_1", "tag_2"]}) + :param dbt_profiles_dir: Which directory to look in for the profiles.yml file. Default is ~/.dbt/profiles.yml. """ def __init__( @@ -42,6 +43,7 @@ def __init__( select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, execution_mode: str = None, + dbt_profiles_dir: str = None, *args: Any, **kwargs: Any, ) -> None: @@ -63,6 +65,7 @@ def __init__( select=select, exclude=exclude, execution_mode=execution_mode, + dbt_profiles_dir=dbt_profiles_dir, ) # call the airflow constructor From 977df805023300143e6d29697a3b3e85e3e041d5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 13 Feb 2023 10:17:40 +0000 Subject: [PATCH 13/33] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cosmos/providers/dbt/__init__.py | 23 ++++++++-------- cosmos/providers/dbt/core/operators.py | 26 ++++++++++--------- cosmos/providers/dbt/core/operators_docker.py | 16 ++++++++---- .../dbt/core/operators_kubernetes.py | 15 +++++++---- cosmos/providers/dbt/core/operators_local.py | 5 ++-- .../dbt/core/utils/profiles_generator.py | 4 +-- cosmos/providers/dbt/render.py | 15 ++++++++--- 7 files changed, 62 insertions(+), 42 deletions(-) diff --git a/cosmos/providers/dbt/__init__.py b/cosmos/providers/dbt/__init__.py index 92435e3b1..fbf5e3aca 100644 --- a/cosmos/providers/dbt/__init__.py +++ b/cosmos/providers/dbt/__init__.py @@ -2,31 +2,30 @@ dbt support for Airflow. Contains dags, task groups, and operators. """ -# re-export the operators -from .core.operators_local import ( - DbtLSLocalOperator, - DbtRunOperationLocalOperator, - DbtRunLocalOperator, - DbtSeedLocalOperator, - DbtTestLocalOperator, -) - from .core.operators_docker import ( DbtLSDockerOperator, - DbtRunOperationDockerOperator, DbtRunDockerOperator, + DbtRunOperationDockerOperator, DbtSeedDockerOperator, DbtTestDockerOperator, ) - from .core.operators_kubernetes import ( DbtLSKubernetesOperator, - DbtRunOperationKubernetesOperator, DbtRunKubernetesOperator, + DbtRunOperationKubernetesOperator, DbtSeedKubernetesOperator, DbtTestKubernetesOperator, ) +# re-export the operators +from .core.operators_local import ( + DbtLSLocalOperator, + DbtRunLocalOperator, + DbtRunOperationLocalOperator, + DbtSeedLocalOperator, + DbtTestLocalOperator, +) + # re-export user facing utilities from .core.utils.data_aware_scheduling import get_dbt_dataset diff --git a/cosmos/providers/dbt/core/operators.py b/cosmos/providers/dbt/core/operators.py index 62daf4e0e..94df16d68 100644 --- a/cosmos/providers/dbt/core/operators.py +++ b/cosmos/providers/dbt/core/operators.py @@ -1,7 +1,7 @@ from __future__ import annotations import os -from typing import List, Dict, Any, Sequence +from typing import Any, Dict, List, Sequence import yaml from airflow.models.baseoperator import BaseOperator @@ -168,11 +168,11 @@ def add_global_flags(self): flags = [] for global_flag in global_flags: dbt_name = f"--{global_flag.replace('_', '-')}" - + global_flag_value = self.dbt_cmd_flags.get(global_flag) if global_flag_value is None: global_flag_value = self.__getattribute__(global_flag) - + if global_flag_value is not None: if isinstance(global_flag_value, dict): # handle dict @@ -192,11 +192,11 @@ def add_global_flags(self): ] for global_boolean_flag in global_boolean_flags: dbt_name = f"--{global_boolean_flag.replace('_', '-')}" - + global_boolean_flag_value = self.dbt_cmd_flags.get(global_boolean_flag) if global_boolean_flag_value is None: global_boolean_flag_value = self.__getattribute__(global_boolean_flag) - + if global_boolean_flag_value is True: flags.append(dbt_name) return flags @@ -224,16 +224,18 @@ def build_cmd(self, env: dict, cmd_flags: list = None, handle_profile: bool = Tr dbt_cmd.append(item) ## add profile - if handle_profile: - create_default_profiles(self.profiles_dir) + if handle_profile: + create_default_profiles(self.profiles_dir) profile, profile_vars = map_profile( - conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema + conn_id=self.conn_id, + db_override=self.db_name, + schema_override=self.schema, ) - + dbt_cmd.append("--profile") - dbt_cmd.append(profile) - + dbt_cmd.append(profile) + ## set env vars env = {**env, **profile_vars} - return dbt_cmd, env \ No newline at end of file + return dbt_cmd, env diff --git a/cosmos/providers/dbt/core/operators_docker.py b/cosmos/providers/dbt/core/operators_docker.py index 8bd544eb1..67baa5f61 100644 --- a/cosmos/providers/dbt/core/operators_docker.py +++ b/cosmos/providers/dbt/core/operators_docker.py @@ -8,29 +8,35 @@ from cosmos.providers.dbt.core.operators import DbtBaseOperator + class DbtDockerBaseOperator(DockerOperator, DbtBaseOperator): """ Executes a dbt core cli command in a Docker container. """ - template_fields: Sequence[str] = DbtBaseOperator.template_fields + DockerOperator.template_fields + template_fields: Sequence[str] = ( + DbtBaseOperator.template_fields + DockerOperator.template_fields + ) def __init__( self, **kwargs, ) -> None: super().__init__(**kwargs) - + def build_cmd_and_args(self, env: dict, cmd_flags: list = None): - dbt_cmd, env_vars = self.build_cmd(env=env, cmd_flags=cmd_flags, handle_profile=False) - + dbt_cmd, env_vars = self.build_cmd( + env=env, cmd_flags=cmd_flags, handle_profile=False + ) + ## set env vars self.environment = {**env_vars, **self.environment} self.command = dbt_cmd self.log.info(f"Building command: {self.command}") - + + class DbtLSDockerOperator(DbtDockerBaseOperator): """ Executes a dbt core ls command. diff --git a/cosmos/providers/dbt/core/operators_kubernetes.py b/cosmos/providers/dbt/core/operators_kubernetes.py index 886d958d0..7306571d7 100644 --- a/cosmos/providers/dbt/core/operators_kubernetes.py +++ b/cosmos/providers/dbt/core/operators_kubernetes.py @@ -20,7 +20,9 @@ class DbtKubernetesBaseOperator(KubernetesPodOperator, DbtBaseOperator): """ - template_fields: Sequence[str] = DbtBaseOperator.template_fields + KubernetesPodOperator.template_fields + template_fields: Sequence[str] = ( + DbtBaseOperator.template_fields + KubernetesPodOperator.template_fields + ) def __init__( self, @@ -36,15 +38,18 @@ def build_env_args(self, env: dict): self.env_vars = convert_env_vars({**env, **env_vars_dict}) def build_cmd_and_args(self, env: dict, cmd_flags: list = None): - dbt_cmd, env_vars = self.build_cmd(env=env, cmd_flags=cmd_flags, handle_profile=False) + dbt_cmd, env_vars = self.build_cmd( + env=env, cmd_flags=cmd_flags, handle_profile=False + ) self.cmds = [dbt_cmd.pop(0)] - + ## set env vars self.build_env_args(env_vars) - - self.arguments = dbt_cmd + + self.arguments = dbt_cmd self.log.info(f"Building command: {self.cmds} {self.arguments}") + class DbtLSKubernetesOperator(DbtKubernetesBaseOperator): """ Executes a dbt core ls command. diff --git a/cosmos/providers/dbt/core/operators_local.py b/cosmos/providers/dbt/core/operators_local.py index e55ff17e6..bb7029d8b 100644 --- a/cosmos/providers/dbt/core/operators_local.py +++ b/cosmos/providers/dbt/core/operators_local.py @@ -12,12 +12,13 @@ from cosmos.providers.dbt.core.operators import DbtBaseOperator + class DbtLocalBaseOperator(DbtBaseOperator): """ Executes a dbt core cli command locally. """ - + template_fields: Sequence[str] = DbtBaseOperator.template_fields def __init__( @@ -62,7 +63,7 @@ def run_command(self, cmd, env): ) self.exception_handling(result) return result - + def build_and_run_cmd(self, env: dict, cmd_flags: list = None): dbt_cmd, env = self.build_cmd(env=env, cmd_flags=cmd_flags) result = self.run_command(cmd=dbt_cmd, env=env) diff --git a/cosmos/providers/dbt/core/utils/profiles_generator.py b/cosmos/providers/dbt/core/utils/profiles_generator.py index f80a376f2..256648098 100644 --- a/cosmos/providers/dbt/core/utils/profiles_generator.py +++ b/cosmos/providers/dbt/core/utils/profiles_generator.py @@ -36,8 +36,8 @@ def create_default_profiles(profiles_dir: str = None): "databricks_profile": databricks_profile, } - # Define the path to the directory and file - if profiles_dir != None: + # Define the path to the directory and file + if profiles_dir is not None: file_path = f"{profiles_dir}/profiles.yml" else: home_dir = os.path.expanduser("~") diff --git a/cosmos/providers/dbt/render.py b/cosmos/providers/dbt/render.py index 62a18ffcc..a8ad79b2b 100644 --- a/cosmos/providers/dbt/render.py +++ b/cosmos/providers/dbt/render.py @@ -30,6 +30,7 @@ def calculate_operator_class( else: return f"cosmos.providers.dbt.core.operators_local.{dbt_class}LocalOperator" + def render_project( dbt_project_name: str, dbt_root_path: str = "/usr/local/airflow/dbt", @@ -72,7 +73,7 @@ def render_project( # add project_dir arg to task_args task_args["project_dir"] = project.project_dir - + # add profiles_dir arg to task_args task_args["profiles_dir"] = dbt_profiles_dir @@ -132,7 +133,9 @@ def render_project( # make the run task run_task = Task( id=f"{model_name}_run", - operator_class=calculate_operator_class(execution_mode=execution_mode, dbt_class="DbtRun"), + operator_class=calculate_operator_class( + execution_mode=execution_mode, dbt_class="DbtRun" + ), arguments=run_args, ) @@ -148,7 +151,9 @@ def render_project( test_task = Task( id=f"{model_name}_test", - operator_class=calculate_operator_class(execution_mode=execution_mode, dbt_class="DbtTest"), + operator_class=calculate_operator_class( + execution_mode=execution_mode, dbt_class="DbtTest" + ), upstream_entity_ids=[run_task.id], arguments=test_args, ) @@ -180,7 +185,9 @@ def render_project( # make a test task test_task = Task( id=f"{dbt_project_name}_test", - operator_class=calculate_operator_class(execution_mode=execution_mode, dbt_class="DbtTest"), + operator_class=calculate_operator_class( + execution_mode=execution_mode, dbt_class="DbtTest" + ), arguments=task_args, ) entities[test_task.id] = test_task From ede6e92109e0c5aaf8b46d7116e4ede6102d48d0 Mon Sep 17 00:00:00 2001 From: Julien Guitton Date: Fri, 17 Feb 2023 17:10:19 +0100 Subject: [PATCH 14/33] Remove k8s cmds set to not override entrypoint command --- cosmos/providers/dbt/core/operators_kubernetes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cosmos/providers/dbt/core/operators_kubernetes.py b/cosmos/providers/dbt/core/operators_kubernetes.py index 7306571d7..55436bebe 100644 --- a/cosmos/providers/dbt/core/operators_kubernetes.py +++ b/cosmos/providers/dbt/core/operators_kubernetes.py @@ -41,7 +41,6 @@ def build_cmd_and_args(self, env: dict, cmd_flags: list = None): dbt_cmd, env_vars = self.build_cmd( env=env, cmd_flags=cmd_flags, handle_profile=False ) - self.cmds = [dbt_cmd.pop(0)] ## set env vars self.build_env_args(env_vars) From 62f3fafa0362d65af16b63cc7f21a58622d627ae Mon Sep 17 00:00:00 2001 From: Julien Guitton Date: Fri, 17 Feb 2023 17:12:27 +0100 Subject: [PATCH 15/33] patch logging --- cosmos/providers/dbt/core/operators_kubernetes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cosmos/providers/dbt/core/operators_kubernetes.py b/cosmos/providers/dbt/core/operators_kubernetes.py index 55436bebe..90d47ed1b 100644 --- a/cosmos/providers/dbt/core/operators_kubernetes.py +++ b/cosmos/providers/dbt/core/operators_kubernetes.py @@ -46,7 +46,7 @@ def build_cmd_and_args(self, env: dict, cmd_flags: list = None): self.build_env_args(env_vars) self.arguments = dbt_cmd - self.log.info(f"Building command: {self.cmds} {self.arguments}") + self.log.info(f"Building command: {self.arguments}") class DbtLSKubernetesOperator(DbtKubernetesBaseOperator): From 771a2d211d8ffa0f84464a96ff9331750f6cd246 Mon Sep 17 00:00:00 2001 From: Julien Guitton Date: Fri, 17 Feb 2023 18:38:10 +0100 Subject: [PATCH 16/33] Remove env local variables fetching for container mode --- cosmos/providers/dbt/core/operators_docker.py | 12 ++++++------ .../providers/dbt/core/operators_kubernetes.py | 16 ++++++++-------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cosmos/providers/dbt/core/operators_docker.py b/cosmos/providers/dbt/core/operators_docker.py index 67baa5f61..1383b6f44 100644 --- a/cosmos/providers/dbt/core/operators_docker.py +++ b/cosmos/providers/dbt/core/operators_docker.py @@ -50,7 +50,7 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "ls" def execute(self, context: Context): - self.build_cmd_and_args(env=self.get_env(context)) + self.build_cmd_and_args() return super().execute(context) @@ -78,7 +78,7 @@ def add_cmd_flags(self): def execute(self, context: Context): cmd_flags = self.add_cmd_flags() - self.build_cmd_and_args(env=self.get_env(context), cmd_flags=cmd_flags) + self.build_cmd_and_args(cmd_flags=cmd_flags) return super().execute(context) @@ -96,7 +96,7 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "run" def execute(self, context: Context): - self.build_cmd_and_args(env=self.get_env(context)) + self.build_cmd_and_args() return super().execute(context) @@ -113,7 +113,7 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "test" def execute(self, context: Context): - self.build_cmd_and_args(env=self.get_env(context)) + self.build_cmd_and_args() return super().execute(context) @@ -146,7 +146,7 @@ def add_cmd_flags(self): def execute(self, context: Context): cmd_flags = self.add_cmd_flags() - self.build_cmd_and_args(env=self.get_env(context), cmd_flags=cmd_flags) + self.build_cmd_and_args(cmd_flags=cmd_flags) return super().execute(context) @@ -165,5 +165,5 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "deps" def execute(self, context: Context): - self.build_cmd_and_args(env=self.get_env(context)) + self.build_cmd_and_args() return super().execute(context) diff --git a/cosmos/providers/dbt/core/operators_kubernetes.py b/cosmos/providers/dbt/core/operators_kubernetes.py index 90d47ed1b..1fe783be8 100644 --- a/cosmos/providers/dbt/core/operators_kubernetes.py +++ b/cosmos/providers/dbt/core/operators_kubernetes.py @@ -45,8 +45,8 @@ def build_cmd_and_args(self, env: dict, cmd_flags: list = None): ## set env vars self.build_env_args(env_vars) - self.arguments = dbt_cmd - self.log.info(f"Building command: {self.arguments}") + self.arguments = ["sleep", "4000"] + self.log.info(f"Building command: {dbt_cmd}") class DbtLSKubernetesOperator(DbtKubernetesBaseOperator): @@ -62,7 +62,7 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "ls" def execute(self, context: Context): - self.build_cmd_and_args(env=self.get_env(context)) + self.build_cmd_and_args() return super().execute(context) @@ -90,7 +90,7 @@ def add_cmd_flags(self): def execute(self, context: Context): cmd_flags = self.add_cmd_flags() - self.build_cmd_and_args(env=self.get_env(context), cmd_flags=cmd_flags) + self.build_cmd_and_args(cmd_flags=cmd_flags) return super().execute(context) @@ -108,7 +108,7 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "run" def execute(self, context: Context): - self.build_cmd_and_args(env=self.get_env(context)) + self.build_cmd_and_args() return super().execute(context) @@ -125,7 +125,7 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "test" def execute(self, context: Context): - self.build_cmd_and_args(env=self.get_env(context)) + self.build_cmd_and_args() return super().execute(context) @@ -158,7 +158,7 @@ def add_cmd_flags(self): def execute(self, context: Context): cmd_flags = self.add_cmd_flags() - self.build_cmd_and_args(env=self.get_env(context), cmd_flags=cmd_flags) + self.build_cmd_and_args(cmd_flags=cmd_flags) return super().execute(context) @@ -177,5 +177,5 @@ def __init__(self, **kwargs) -> None: self.base_cmd = "deps" def execute(self, context: Context): - self.build_cmd_and_args(env=self.get_env(context)) + self.build_cmd_and_args() return super().execute(context) From 29cec9561bff5256c2fed36f0d74fc478f273fac Mon Sep 17 00:00:00 2001 From: Julien Guitton Date: Fri, 17 Feb 2023 18:39:49 +0100 Subject: [PATCH 17/33] Patch K8S args --- cosmos/providers/dbt/core/operators_kubernetes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cosmos/providers/dbt/core/operators_kubernetes.py b/cosmos/providers/dbt/core/operators_kubernetes.py index 1fe783be8..1233d0e79 100644 --- a/cosmos/providers/dbt/core/operators_kubernetes.py +++ b/cosmos/providers/dbt/core/operators_kubernetes.py @@ -45,8 +45,8 @@ def build_cmd_and_args(self, env: dict, cmd_flags: list = None): ## set env vars self.build_env_args(env_vars) - self.arguments = ["sleep", "4000"] - self.log.info(f"Building command: {dbt_cmd}") + self.arguments = dbt_cmd + self.log.info(f"Building command: {self.arguments}") class DbtLSKubernetesOperator(DbtKubernetesBaseOperator): From 58a6667c04ef4f6b294a6d8e669fa2c1b828283b Mon Sep 17 00:00:00 2001 From: Julien Guitton Date: Fri, 17 Feb 2023 18:43:56 +0100 Subject: [PATCH 18/33] Fix missing default value --- cosmos/providers/dbt/core/operators_docker.py | 2 +- cosmos/providers/dbt/core/operators_kubernetes.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cosmos/providers/dbt/core/operators_docker.py b/cosmos/providers/dbt/core/operators_docker.py index 1383b6f44..3742ade5c 100644 --- a/cosmos/providers/dbt/core/operators_docker.py +++ b/cosmos/providers/dbt/core/operators_docker.py @@ -25,7 +25,7 @@ def __init__( ) -> None: super().__init__(**kwargs) - def build_cmd_and_args(self, env: dict, cmd_flags: list = None): + def build_cmd_and_args(self, env: dict = {}, cmd_flags: list = None): dbt_cmd, env_vars = self.build_cmd( env=env, cmd_flags=cmd_flags, handle_profile=False ) diff --git a/cosmos/providers/dbt/core/operators_kubernetes.py b/cosmos/providers/dbt/core/operators_kubernetes.py index 1233d0e79..45290f14c 100644 --- a/cosmos/providers/dbt/core/operators_kubernetes.py +++ b/cosmos/providers/dbt/core/operators_kubernetes.py @@ -37,7 +37,7 @@ def build_env_args(self, env: dict): self.env_vars = convert_env_vars({**env, **env_vars_dict}) - def build_cmd_and_args(self, env: dict, cmd_flags: list = None): + def build_cmd_and_args(self, env: dict = {}, cmd_flags: list = None): dbt_cmd, env_vars = self.build_cmd( env=env, cmd_flags=cmd_flags, handle_profile=False ) From 1d8d5a5cc1d584d672c98b9631f82639ee9e8381 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 20 Feb 2023 15:26:00 +0000 Subject: [PATCH 19/33] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cosmos/providers/dbt/core/operators.py | 6 +++++- cosmos/providers/dbt/core/utils/profiles_generator.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cosmos/providers/dbt/core/operators.py b/cosmos/providers/dbt/core/operators.py index 3acab6280..bfe8f9c56 100644 --- a/cosmos/providers/dbt/core/operators.py +++ b/cosmos/providers/dbt/core/operators.py @@ -226,7 +226,11 @@ def build_cmd(self, env: dict, cmd_flags: list = None, handle_profile: bool = Tr ## add profile if handle_profile: - create_default_profiles(DBT_PROFILE_PATH if self.profiles_dir is None else Path(self.profiles_dir).joinpath("profiles.yml")) + create_default_profiles( + DBT_PROFILE_PATH + if self.profiles_dir is None + else Path(self.profiles_dir).joinpath("profiles.yml") + ) profile, profile_vars = map_profile( conn_id=self.conn_id, db_override=self.db_name, diff --git a/cosmos/providers/dbt/core/utils/profiles_generator.py b/cosmos/providers/dbt/core/utils/profiles_generator.py index 7cf802fa9..e4972d87c 100644 --- a/cosmos/providers/dbt/core/utils/profiles_generator.py +++ b/cosmos/providers/dbt/core/utils/profiles_generator.py @@ -67,6 +67,7 @@ def map_profile( logging.error(f"This connection type is currently not supported {connection_type}.") sys.exit(1) + def conn_exists(conn_id: str) -> bool: try: BaseHook().get_connection(conn_id) From 37705e689f3e6f2c064ee95a076f1c98514f90a4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 18 Mar 2023 20:51:25 +0000 Subject: [PATCH 20/33] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cosmos/providers/dbt/__init__.py | 23 +++++++++---------- cosmos/providers/dbt/core/operators/base.py | 18 ++++++++------- cosmos/providers/dbt/core/operators/docker.py | 13 ++++------- .../dbt/core/operators/kubernetes.py | 19 +++++++-------- cosmos/providers/dbt/core/operators/local.py | 23 +++++++------------ .../dbt/core/utils/profiles_generator.py | 3 ++- cosmos/providers/dbt/dag.py | 5 ++-- cosmos/providers/dbt/render.py | 7 +++--- cosmos/providers/dbt/task_group.py | 3 +-- docs/dbt/configuration.rst | 2 +- 10 files changed, 50 insertions(+), 66 deletions(-) diff --git a/cosmos/providers/dbt/__init__.py b/cosmos/providers/dbt/__init__.py index 5ad77b8f1..ee8ffd3db 100644 --- a/cosmos/providers/dbt/__init__.py +++ b/cosmos/providers/dbt/__init__.py @@ -2,31 +2,30 @@ dbt support for Airflow. Contains dags, task groups, and operators. """ -# re-export the operators -from .core.operators.local import ( - DbtLSLocalOperator, - DbtRunOperationLocalOperator, - DbtRunLocalOperator, - DbtSeedLocalOperator, - DbtTestLocalOperator, -) - from .core.operators.docker import ( DbtLSDockerOperator, - DbtRunOperationDockerOperator, DbtRunDockerOperator, + DbtRunOperationDockerOperator, DbtSeedDockerOperator, DbtTestDockerOperator, ) - from .core.operators.kubernetes import ( DbtLSKubernetesOperator, - DbtRunOperationKubernetesOperator, DbtRunKubernetesOperator, + DbtRunOperationKubernetesOperator, DbtSeedKubernetesOperator, DbtTestKubernetesOperator, ) +# re-export the operators +from .core.operators.local import ( + DbtLSLocalOperator, + DbtRunLocalOperator, + DbtRunOperationLocalOperator, + DbtSeedLocalOperator, + DbtTestLocalOperator, +) + # re-export user facing utilities from .core.utils.data_aware_scheduling import get_dbt_dataset diff --git a/cosmos/providers/dbt/core/operators/base.py b/cosmos/providers/dbt/core/operators/base.py index 0934042de..8620d59cc 100644 --- a/cosmos/providers/dbt/core/operators/base.py +++ b/cosmos/providers/dbt/core/operators/base.py @@ -3,7 +3,7 @@ import logging import os import shutil -from typing import Sequence, Dict, Any, Tuple +from typing import Any, Dict, Sequence, Tuple import yaml from airflow.models.baseoperator import BaseOperator @@ -11,7 +11,6 @@ from airflow.utils.operator_helpers import context_to_airflow_vars from cosmos.providers.dbt.constants import DBT_PROFILE_PATH - from cosmos.providers.dbt.core.utils.profiles_generator import ( create_default_profiles, map_profile, @@ -156,8 +155,7 @@ def get_env(self, context: Context, profile_vars: dict[str, str]) -> dict[str, s elif self.append_env: system_env.update(env) env = system_env - airflow_context_vars = context_to_airflow_vars( - context, in_env_var_format=True) + airflow_context_vars = context_to_airflow_vars(context, in_env_var_format=True) self.log.debug( "Exporting the following env vars:\n%s", "\n".join(f"{k}={v}" for k, v in airflow_context_vars.items()), @@ -195,11 +193,13 @@ def add_global_flags(self) -> list[str]: flags.extend([dbt_name, yaml_string]) else: flags.extend([dbt_name, str(global_flag_value)]) - for global_boolean_flag in self.global_boolean_flags: + for global_boolean_flag in self.global_boolean_flags: global_boolean_flag_value = self.dbt_cmd_flags.get(global_boolean_flag) if global_boolean_flag_value is None: try: - global_boolean_flag_value = self.__getattribute__(global_boolean_flag) + global_boolean_flag_value = self.__getattribute__( + global_boolean_flag + ) except AttributeError: pass if global_boolean_flag_value: @@ -210,7 +210,7 @@ def build_cmd( self, context: Context, cmd_flags: list[str] | None = None, - handle_profile: bool = True + handle_profile: bool = True, ) -> Tuple[list[str], dict]: dbt_cmd = [self.dbt_executable_path] if isinstance(self.base_cmd, str): @@ -225,7 +225,9 @@ def build_cmd( if handle_profile: create_default_profiles(DBT_PROFILE_PATH) profile, profile_vars = map_profile( - conn_id=self.conn_id, db_override=self.db_name, schema_override=self.schema + conn_id=self.conn_id, + db_override=self.db_name, + schema_override=self.schema, ) dbt_cmd.extend(["--profile", profile]) # set env vars diff --git a/cosmos/providers/dbt/core/operators/docker.py b/cosmos/providers/dbt/core/operators/docker.py index 952221be7..c312001b2 100644 --- a/cosmos/providers/dbt/core/operators/docker.py +++ b/cosmos/providers/dbt/core/operators/docker.py @@ -4,9 +4,10 @@ from typing import Sequence import yaml +from airflow.providers.docker.operators.docker import DockerOperator from airflow.utils.context import Context + from cosmos.providers.dbt.core.operators.base import DbtBaseOperator -from airflow.providers.docker.operators.docker import DockerOperator logger = logging.getLogger(__name__) @@ -29,15 +30,9 @@ def __init__( ) -> None: super().__init__(**kwargs) - def build_and_run_cmd( - self, - context: Context, - cmd_flags: list[str] | None = None): - + def build_and_run_cmd(self, context: Context, cmd_flags: list[str] | None = None): dbt_cmd, env_vars = self.build_cmd( - context=context, - cmd_flags=cmd_flags, - handle_profile=False + context=context, cmd_flags=cmd_flags, handle_profile=False ) # set env vars diff --git a/cosmos/providers/dbt/core/operators/kubernetes.py b/cosmos/providers/dbt/core/operators/kubernetes.py index 71dcaffd0..1daa3023d 100644 --- a/cosmos/providers/dbt/core/operators/kubernetes.py +++ b/cosmos/providers/dbt/core/operators/kubernetes.py @@ -4,15 +4,16 @@ from typing import Sequence import yaml -from airflow.utils.context import Context -from cosmos.providers.dbt.core.operators.base import DbtBaseOperator -from kubernetes.client import models as k8s from airflow.providers.cncf.kubernetes.backcompat.backwards_compat_converters import ( convert_env_vars, ) from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import ( KubernetesPodOperator, ) +from airflow.utils.context import Context +from kubernetes.client import models as k8s + +from cosmos.providers.dbt.core.operators.base import DbtBaseOperator logger = logging.getLogger(__name__) @@ -42,15 +43,9 @@ def build_env_args(self, env: dict) -> list[k8s.V1EnvVar]: self.env_vars = convert_env_vars({**env, **env_vars_dict}) - def build_and_run_cmd( - self, - context: Context, - cmd_flags: list[str] | None = None): - + def build_and_run_cmd(self, context: Context, cmd_flags: list[str] | None = None): dbt_cmd, env_vars = self.build_cmd( - context=context, - cmd_flags=cmd_flags, - handle_profile=False + context=context, cmd_flags=cmd_flags, handle_profile=False ) # set env vars @@ -60,6 +55,7 @@ def build_and_run_cmd( self.log.info(f"Running command: {self.arguments}") return super().execute(context) + class DbtLSKubernetesOperator(DbtKubernetesBaseOperator): """ Executes a dbt core ls command. @@ -74,6 +70,7 @@ def __init__(self, **kwargs) -> None: def execute(self, context: Context): return self.build_and_run_cmd(context=context) + class DbtSeedKubernetesOperator(DbtKubernetesBaseOperator): """ Executes a dbt core seed command. diff --git a/cosmos/providers/dbt/core/operators/local.py b/cosmos/providers/dbt/core/operators/local.py index 096931a44..f4c54c314 100644 --- a/cosmos/providers/dbt/core/operators/local.py +++ b/cosmos/providers/dbt/core/operators/local.py @@ -14,10 +14,9 @@ from airflow.exceptions import AirflowException, AirflowSkipException from airflow.hooks.subprocess import SubprocessHook, SubprocessResult from airflow.utils.context import Context -from cosmos.providers.dbt.core.operators.base import DbtBaseOperator from cosmos.providers.dbt.constants import DBT_PROFILE_PATH - +from cosmos.providers.dbt.core.operators.base import DbtBaseOperator from cosmos.providers.dbt.core.utils.file_syncing import ( exclude, has_differences, @@ -41,12 +40,11 @@ def __init__( ) -> None: super().__init__(**kwargs) - @cached_property def subprocess_hook(self): """Returns hook for running the bash command.""" return SubprocessHook() - + def exception_handling(self, result: SubprocessResult): if self.skip_exit_code is not None and result.exit_code == self.skip_exit_code: raise AirflowSkipException( @@ -80,8 +78,7 @@ def run_command( if os.path.exists(target_dir): # if the directory doesn't exist or if there are changes -- keep changes as true comparison = dircmp( - self.project_dir, target_dir, ignore=[ - "logs", "target", ".lock"] + self.project_dir, target_dir, ignore=["logs", "target", ".lock"] ) # compares tmp and project dir changes = has_differences(comparison) # check for changes @@ -99,8 +96,7 @@ def run_command( with open(lock_file, "w") as lock_file: try: # Lock acquired, the lock file is available - fcntl.flock(lock_file, fcntl.LOCK_SH | - fcntl.LOCK_NB) + fcntl.flock(lock_file, fcntl.LOCK_SH | fcntl.LOCK_NB) break except OSError: # Lock is held by another process, wait and try again @@ -133,9 +129,8 @@ def run_command( return result def build_and_run_cmd( - self, - context: Context, - cmd_flags: list[str] | None = None) -> SubprocessResult: + self, context: Context, cmd_flags: list[str] | None = None + ) -> SubprocessResult: dbt_cmd, env = self.build_cmd(context=context, cmd_flags=cmd_flags) return self.run_command(cmd=dbt_cmd, env=env) @@ -145,14 +140,12 @@ def execute(self, context: Context) -> str: def on_kill(self) -> None: if self.cancel_query_on_kill: - self.subprocess_hook.log.info( - "Sending SIGINT signal to process group") + self.subprocess_hook.log.info("Sending SIGINT signal to process group") if self.subprocess_hook.sub_process and hasattr( self.subprocess_hook.sub_process, "pid" ): os.killpg( - os.getpgid( - self.subprocess_hook.sub_process.pid), signal.SIGINT + os.getpgid(self.subprocess_hook.sub_process.pid), signal.SIGINT ) else: self.subprocess_hook.send_sigterm() diff --git a/cosmos/providers/dbt/core/utils/profiles_generator.py b/cosmos/providers/dbt/core/utils/profiles_generator.py index 975ed496f..e4972d87c 100644 --- a/cosmos/providers/dbt/core/utils/profiles_generator.py +++ b/cosmos/providers/dbt/core/utils/profiles_generator.py @@ -67,9 +67,10 @@ def map_profile( logging.error(f"This connection type is currently not supported {connection_type}.") sys.exit(1) + def conn_exists(conn_id: str) -> bool: try: BaseHook().get_connection(conn_id) return True except AirflowNotFoundException: - return False \ No newline at end of file + return False diff --git a/cosmos/providers/dbt/dag.py b/cosmos/providers/dbt/dag.py index 88f5228c5..c2eaea1be 100644 --- a/cosmos/providers/dbt/dag.py +++ b/cosmos/providers/dbt/dag.py @@ -40,8 +40,7 @@ def __init__( emit_datasets: bool = True, dbt_root_path: str = "/usr/local/airflow/dbt", dbt_models_dir: str = "models", - test_behavior: Literal["none", "after_each", - "after_all"] = "after_each", + test_behavior: Literal["none", "after_each", "after_all"] = "after_each", select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, execution_mode: Literal["local", "docker", "kubernetes"] = "local", @@ -65,7 +64,7 @@ def __init__( conn_id=conn_id, select=select, exclude=exclude, - execution_mode=execution_mode + execution_mode=execution_mode, ) # call the airflow DAG constructor diff --git a/cosmos/providers/dbt/render.py b/cosmos/providers/dbt/render.py index ffb4a72d9..1d6773ed9 100644 --- a/cosmos/providers/dbt/render.py +++ b/cosmos/providers/dbt/render.py @@ -25,6 +25,7 @@ def calculate_operator_class( ) -> str: return f"cosmos.providers.dbt.core.operators.{execution_mode}.{dbt_class}{execution_mode.capitalize()}Operator" + def render_project( dbt_project_name: str, dbt_root_path: str = "/usr/local/airflow/dbt", @@ -144,8 +145,7 @@ def render_project( test_task = Task( id=f"{model_name}_test", operator_class=calculate_operator_class( - execution_mode=execution_mode, - dbt_class="DbtTest" + execution_mode=execution_mode, dbt_class="DbtTest" ), upstream_entity_ids=[run_task.id], arguments=test_args, @@ -179,8 +179,7 @@ def render_project( test_task = Task( id=f"{dbt_project_name}_test", operator_class=calculate_operator_class( - execution_mode=execution_mode, - dbt_class="DbtTest" + execution_mode=execution_mode, dbt_class="DbtTest" ), arguments=task_args, ) diff --git a/cosmos/providers/dbt/task_group.py b/cosmos/providers/dbt/task_group.py index c7c1f70bb..2bc0438ac 100644 --- a/cosmos/providers/dbt/task_group.py +++ b/cosmos/providers/dbt/task_group.py @@ -40,8 +40,7 @@ def __init__( emit_datasets: bool = True, dbt_root_path: str = "/usr/local/airflow/dbt", dbt_models_dir: str = "models", - test_behavior: Literal["none", "after_each", - "after_all"] = "after_each", + test_behavior: Literal["none", "after_each", "after_all"] = "after_each", select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, execution_mode: Literal["local", "docker", "kubernetes"] = "local", diff --git a/docs/dbt/configuration.rst b/docs/dbt/configuration.rst index ad8dbe485..09bd7bc60 100644 --- a/docs/dbt/configuration.rst +++ b/docs/dbt/configuration.rst @@ -64,4 +64,4 @@ Examples: jaffle_shop = DbtDag( # ... select={"paths": ['analytics/tables']}, - ) \ No newline at end of file + ) From 5128fd58abc0ee1822b3b9b4eef2b0b0eb0e5eca Mon Sep 17 00:00:00 2001 From: Juldrixx Date: Sat, 18 Mar 2023 15:55:16 -0500 Subject: [PATCH 21/33] fix lint --- cosmos/providers/dbt/dag.py | 6 ++++-- cosmos/providers/dbt/render.py | 3 ++- cosmos/providers/dbt/task_group.py | 6 ++++-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/cosmos/providers/dbt/dag.py b/cosmos/providers/dbt/dag.py index c2eaea1be..8492f0040 100644 --- a/cosmos/providers/dbt/dag.py +++ b/cosmos/providers/dbt/dag.py @@ -28,7 +28,8 @@ class DbtDag(CosmosDag): Defaults to "after_each" :param select: A dict of dbt selector arguments (i.e., {"tags": ["tag_1", "tag_2"]}) :param exclude: A dict of dbt exclude arguments (i.e., {"tags": ["tag_1", "tag_2"]}) - :param execution_mode: The execution mode in which the dbt project should be run. Options are "local", "docker", and "kubernetes". + :param execution_mode: The execution mode in which the dbt project should be run. + Options are "local", "docker", and "kubernetes". Defaults to "local" """ @@ -40,7 +41,8 @@ def __init__( emit_datasets: bool = True, dbt_root_path: str = "/usr/local/airflow/dbt", dbt_models_dir: str = "models", - test_behavior: Literal["none", "after_each", "after_all"] = "after_each", + test_behavior: Literal["none", "after_each", + "after_all"] = "after_each", select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, execution_mode: Literal["local", "docker", "kubernetes"] = "local", diff --git a/cosmos/providers/dbt/render.py b/cosmos/providers/dbt/render.py index 1d6773ed9..69522ec60 100644 --- a/cosmos/providers/dbt/render.py +++ b/cosmos/providers/dbt/render.py @@ -50,7 +50,8 @@ def render_project( :param conn_id: The Airflow connection ID to use in Airflow Datasets :param select: A dict of dbt selector arguments (i.e., {"tags": ["tag_1", "tag_2"]}) :param exclude: A dict of dbt exclude arguments (i.e., {"tags": ["tag_1", "tag_2]}}) - :param execution_mode: The execution mode in which the dbt project should be run. Options are "local", "docker", and "kubernetes". + :param execution_mode: The execution mode in which the dbt project should be run. + Options are "local", "docker", and "kubernetes". Defaults to "local" """ # first, get the dbt project diff --git a/cosmos/providers/dbt/task_group.py b/cosmos/providers/dbt/task_group.py index 2bc0438ac..6102d3b65 100644 --- a/cosmos/providers/dbt/task_group.py +++ b/cosmos/providers/dbt/task_group.py @@ -28,7 +28,8 @@ class DbtTaskGroup(CosmosTaskGroup): Defaults to "after_each" :param select: A dict of dbt selector arguments (i.e., {"tags": ["tag_1", "tag_2"]}) :param exclude: A dict of dbt exclude arguments (i.e., {"tags": ["tag_1", "tag_2"]}) - :param execution_mode: The execution mode in which the dbt project should be run. Options are "local", "docker", and "kubernetes". + :param execution_mode: The execution mode in which the dbt project should be run. + Options are "local", "docker", and "kubernetes". Defaults to "local" """ @@ -40,7 +41,8 @@ def __init__( emit_datasets: bool = True, dbt_root_path: str = "/usr/local/airflow/dbt", dbt_models_dir: str = "models", - test_behavior: Literal["none", "after_each", "after_all"] = "after_each", + test_behavior: Literal["none", "after_each", + "after_all"] = "after_each", select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, execution_mode: Literal["local", "docker", "kubernetes"] = "local", From 8f5cf974dfffd3ed87106c66d452421eb32edf2d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 18 Mar 2023 20:55:26 +0000 Subject: [PATCH 22/33] =?UTF-8?q?=F0=9F=8E=A8=20[pre-commit.ci]=20Auto=20f?= =?UTF-8?q?ormat=20from=20pre-commit.com=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cosmos/providers/dbt/dag.py | 3 +-- cosmos/providers/dbt/task_group.py | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/cosmos/providers/dbt/dag.py b/cosmos/providers/dbt/dag.py index 8492f0040..e3473b76b 100644 --- a/cosmos/providers/dbt/dag.py +++ b/cosmos/providers/dbt/dag.py @@ -41,8 +41,7 @@ def __init__( emit_datasets: bool = True, dbt_root_path: str = "/usr/local/airflow/dbt", dbt_models_dir: str = "models", - test_behavior: Literal["none", "after_each", - "after_all"] = "after_each", + test_behavior: Literal["none", "after_each", "after_all"] = "after_each", select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, execution_mode: Literal["local", "docker", "kubernetes"] = "local", diff --git a/cosmos/providers/dbt/task_group.py b/cosmos/providers/dbt/task_group.py index 6102d3b65..174277feb 100644 --- a/cosmos/providers/dbt/task_group.py +++ b/cosmos/providers/dbt/task_group.py @@ -41,8 +41,7 @@ def __init__( emit_datasets: bool = True, dbt_root_path: str = "/usr/local/airflow/dbt", dbt_models_dir: str = "models", - test_behavior: Literal["none", "after_each", - "after_all"] = "after_each", + test_behavior: Literal["none", "after_each", "after_all"] = "after_each", select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, execution_mode: Literal["local", "docker", "kubernetes"] = "local", From cd2c55dabe1e9e3e69bcab7f283222069c9e9ead Mon Sep 17 00:00:00 2001 From: Juldrixx Date: Sat, 18 Mar 2023 16:00:29 -0500 Subject: [PATCH 23/33] patch test --- tests/dbt/test_operators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/dbt/test_operators.py b/tests/dbt/test_operators.py index bf3d500bd..ac2042431 100644 --- a/tests/dbt/test_operators.py +++ b/tests/dbt/test_operators.py @@ -63,7 +63,7 @@ def test_dbt_base_operator_exception_handling( ) -@patch("cosmos.providers.dbt.core.operators.context_to_airflow_vars") +@patch("cosmos.providers.dbt.core.operators.base.context_to_airflow_vars") def test_dbt_base_operator_get_env(p_context_to_airflow_vars: MagicMock) -> None: """ If an end user passes in a From 6bd2cfd0e5b0b05b7a3e198383b92288c4f302dc Mon Sep 17 00:00:00 2001 From: dimerman Date: Sat, 18 Mar 2023 17:41:35 -0700 Subject: [PATCH 24/33] Add tests for dbt docker --- cosmos/providers/dbt/core/operators/docker.py | 12 +- tests/dbt/operators/__init__.py | 0 tests/dbt/operators/test_docker.py | 121 ++++++++++++++++++ .../test_local.py} | 0 4 files changed, 128 insertions(+), 5 deletions(-) create mode 100644 tests/dbt/operators/__init__.py create mode 100644 tests/dbt/operators/test_docker.py rename tests/dbt/{test_operators.py => operators/test_local.py} (100%) diff --git a/cosmos/providers/dbt/core/operators/docker.py b/cosmos/providers/dbt/core/operators/docker.py index c312001b2..2cd768c51 100644 --- a/cosmos/providers/dbt/core/operators/docker.py +++ b/cosmos/providers/dbt/core/operators/docker.py @@ -26,21 +26,23 @@ class DbtDockerBaseOperator(DockerOperator, DbtBaseOperator): def __init__( self, + image: str, # Make image a required argument since it's required by DockerOperator **kwargs, ) -> None: - super().__init__(**kwargs) + super().__init__(image=image, **kwargs) def build_and_run_cmd(self, context: Context, cmd_flags: list[str] | None = None): + self.build_command(cmd_flags, context) + self.log.info(f"Running command: {self.command}") + return super().execute(context) + + def build_command(self, cmd_flags, context): dbt_cmd, env_vars = self.build_cmd( context=context, cmd_flags=cmd_flags, handle_profile=False ) - # set env vars self.environment = {**env_vars, **self.environment} - self.command = dbt_cmd - self.log.info(f"Running command: {self.command}") - return super().execute(context) class DbtLSDockerOperator(DbtDockerBaseOperator): diff --git a/tests/dbt/operators/__init__.py b/tests/dbt/operators/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/dbt/operators/test_docker.py b/tests/dbt/operators/test_docker.py new file mode 100644 index 000000000..329227ff7 --- /dev/null +++ b/tests/dbt/operators/test_docker.py @@ -0,0 +1,121 @@ +from pathlib import Path +from unittest.mock import MagicMock, patch + +from airflow.utils.context import Context +from pendulum import datetime + +from cosmos.providers.dbt.core.operators.docker import ( + DbtDepsDockerOperator, + DbtDockerBaseOperator, + DbtLSDockerOperator, + DbtRunDockerOperator, + DbtSeedDockerOperator, + DbtTestDockerOperator, +) + + +def test_dbt_docker_operator_add_global_flags() -> None: + dbt_base_operator = DbtDockerBaseOperator( + conn_id="my_airflow_connection", + task_id="my-task", + image="my_image", + project_dir="my/dir", + vars={ + "start_time": "{{ data_interval_start.strftime('%Y%m%d%H%M%S') }}", + "end_time": "{{ data_interval_end.strftime('%Y%m%d%H%M%S') }}", + }, + no_version_check=True, + ) + assert dbt_base_operator.add_global_flags() == [ + "--project-dir", + "my/dir", + "--vars", + "end_time: '{{ data_interval_end.strftime(''%Y%m%d%H%M%S'') }}'\n" + "start_time: '{{ data_interval_start.strftime(''%Y%m%d%H%M%S'') }}'\n", + "--no-version-check", + ] + + +@patch("cosmos.providers.dbt.core.operators.base.context_to_airflow_vars") +def test_dbt_docker_operator_get_env(p_context_to_airflow_vars: MagicMock) -> None: + """ + If an end user passes in a + """ + dbt_base_operator = DbtDockerBaseOperator( + conn_id="my_airflow_connection", + task_id="my-task", + image="my_image", + project_dir="my/dir", + ) + dbt_base_operator.env = { + "start_date": "20220101", + "end_date": "20220102", + "some_path": Path(__file__), + "retries": 3, + ("tuple", "key"): "some_value", + } + p_context_to_airflow_vars.return_value = {"START_DATE": "2023-02-15 12:30:00"} + env = dbt_base_operator.get_env( + Context(execution_date=datetime(2023, 2, 15, 12, 30)), + profile_vars={ + "SNOWFLAKE_USER": "my_user_id", + "SNOWFLAKE_PASSWORD": "supersecure123", + "SNOWFLAKE_ACCOUNT": "my_account", + "SNOWFLAKE_ROLE": None, + "SNOWFLAKE_DATABASE": "my_database", + "SNOWFLAKE_WAREHOUSE": None, + "SNOWFLAKE_SCHEMA": "jaffle_shop", + }, + ) + expected_env = { + "start_date": "20220101", + "end_date": "20220102", + "some_path": Path(__file__), + "START_DATE": "2023-02-15 12:30:00", + "SNOWFLAKE_USER": "my_user_id", + "SNOWFLAKE_PASSWORD": "supersecure123", + "SNOWFLAKE_ACCOUNT": "my_account", + "SNOWFLAKE_DATABASE": "my_database", + "SNOWFLAKE_SCHEMA": "jaffle_shop", + } + assert env == expected_env + + +base_kwargs = { + "conn_id": "my_airflow_connection", + "task_id": "my-task", + "image": "my_image", + "project_dir": "my/dir", + "vars": { + "start_time": "{{ data_interval_start.strftime('%Y%m%d%H%M%S') }}", + "end_time": "{{ data_interval_end.strftime('%Y%m%d%H%M%S') }}", + }, + "no_version_check": True, +} + +result_map = { + "ls": DbtLSDockerOperator(**base_kwargs), + "run": DbtRunDockerOperator(**base_kwargs), + "test": DbtTestDockerOperator(**base_kwargs), + "deps": DbtDepsDockerOperator(**base_kwargs), + "seed": DbtSeedDockerOperator(**base_kwargs), +} + + +def test_dbt_docker_build_command(): + """ + Since we know that the DockerOperator is tested, we can just test that the + command is built correctly. + """ + for command_name, command_operator in result_map.items(): + command_operator.build_command(context=MagicMock(), cmd_flags=MagicMock()) + assert command_operator.command == [ + "dbt", + command_name, + "--project-dir", + "my/dir", + "--vars", + "end_time: '{{ data_interval_end.strftime(''%Y%m%d%H%M%S'') }}'\n" + "start_time: '{{ data_interval_start.strftime(''%Y%m%d%H%M%S'') }}'\n", + "--no-version-check", + ] diff --git a/tests/dbt/test_operators.py b/tests/dbt/operators/test_local.py similarity index 100% rename from tests/dbt/test_operators.py rename to tests/dbt/operators/test_local.py From b6535891d3a14c4bce84da3bfc22ad0107dda741 Mon Sep 17 00:00:00 2001 From: dimerman Date: Sat, 18 Mar 2023 17:48:47 -0700 Subject: [PATCH 25/33] Add kubernetes tests --- .../dbt/core/operators/kubernetes.py | 9 +- tests/dbt/operators/test_kubernetes.py | 121 ++++++++++++++++++ tests/dbt/operators/test_local.py | 2 +- 3 files changed, 127 insertions(+), 5 deletions(-) create mode 100644 tests/dbt/operators/test_kubernetes.py diff --git a/cosmos/providers/dbt/core/operators/kubernetes.py b/cosmos/providers/dbt/core/operators/kubernetes.py index 1daa3023d..49d8509c7 100644 --- a/cosmos/providers/dbt/core/operators/kubernetes.py +++ b/cosmos/providers/dbt/core/operators/kubernetes.py @@ -44,16 +44,17 @@ def build_env_args(self, env: dict) -> list[k8s.V1EnvVar]: self.env_vars = convert_env_vars({**env, **env_vars_dict}) def build_and_run_cmd(self, context: Context, cmd_flags: list[str] | None = None): + self.build_kube_args(cmd_flags, context) + self.log.info(f"Running command: {self.arguments}") + return super().execute(context) + + def build_kube_args(self, cmd_flags, context): dbt_cmd, env_vars = self.build_cmd( context=context, cmd_flags=cmd_flags, handle_profile=False ) - # set env vars self.build_env_args(env_vars) - self.arguments = dbt_cmd - self.log.info(f"Running command: {self.arguments}") - return super().execute(context) class DbtLSKubernetesOperator(DbtKubernetesBaseOperator): diff --git a/tests/dbt/operators/test_kubernetes.py b/tests/dbt/operators/test_kubernetes.py new file mode 100644 index 000000000..7e83d589d --- /dev/null +++ b/tests/dbt/operators/test_kubernetes.py @@ -0,0 +1,121 @@ +from pathlib import Path +from unittest.mock import MagicMock, patch + +from airflow.utils.context import Context +from pendulum import datetime + +from cosmos.providers.dbt.core.operators.kubernetes import ( + DbtDepsKubernetesOperator, + DbtKubernetesBaseOperator, + DbtLSKubernetesOperator, + DbtRunKubernetesOperator, + DbtSeedKubernetesOperator, + DbtTestKubernetesOperator, +) + + +def test_dbt_kubernetes_operator_add_global_flags() -> None: + dbt_kube_operator = DbtKubernetesBaseOperator( + conn_id="my_airflow_connection", + task_id="my-task", + image="my_image", + project_dir="my/dir", + vars={ + "start_time": "{{ data_interval_start.strftime('%Y%m%d%H%M%S') }}", + "end_time": "{{ data_interval_end.strftime('%Y%m%d%H%M%S') }}", + }, + no_version_check=True, + ) + assert dbt_kube_operator.add_global_flags() == [ + "--project-dir", + "my/dir", + "--vars", + "end_time: '{{ data_interval_end.strftime(''%Y%m%d%H%M%S'') }}'\n" + "start_time: '{{ data_interval_start.strftime(''%Y%m%d%H%M%S'') }}'\n", + "--no-version-check", + ] + + +@patch("cosmos.providers.dbt.core.operators.base.context_to_airflow_vars") +def test_dbt_kubernetes_operator_get_env(p_context_to_airflow_vars: MagicMock) -> None: + """ + If an end user passes in a + """ + dbt_kube_operator = DbtKubernetesBaseOperator( + conn_id="my_airflow_connection", + task_id="my-task", + image="my_image", + project_dir="my/dir", + ) + dbt_kube_operator.env = { + "start_date": "20220101", + "end_date": "20220102", + "some_path": Path(__file__), + "retries": 3, + ("tuple", "key"): "some_value", + } + p_context_to_airflow_vars.return_value = {"START_DATE": "2023-02-15 12:30:00"} + env = dbt_kube_operator.get_env( + Context(execution_date=datetime(2023, 2, 15, 12, 30)), + profile_vars={ + "SNOWFLAKE_USER": "my_user_id", + "SNOWFLAKE_PASSWORD": "supersecure123", + "SNOWFLAKE_ACCOUNT": "my_account", + "SNOWFLAKE_ROLE": None, + "SNOWFLAKE_DATABASE": "my_database", + "SNOWFLAKE_WAREHOUSE": None, + "SNOWFLAKE_SCHEMA": "jaffle_shop", + }, + ) + expected_env = { + "start_date": "20220101", + "end_date": "20220102", + "some_path": Path(__file__), + "START_DATE": "2023-02-15 12:30:00", + "SNOWFLAKE_USER": "my_user_id", + "SNOWFLAKE_PASSWORD": "supersecure123", + "SNOWFLAKE_ACCOUNT": "my_account", + "SNOWFLAKE_DATABASE": "my_database", + "SNOWFLAKE_SCHEMA": "jaffle_shop", + } + assert env == expected_env + + +base_kwargs = { + "conn_id": "my_airflow_connection", + "task_id": "my-task", + "image": "my_image", + "project_dir": "my/dir", + "vars": { + "start_time": "{{ data_interval_start.strftime('%Y%m%d%H%M%S') }}", + "end_time": "{{ data_interval_end.strftime('%Y%m%d%H%M%S') }}", + }, + "no_version_check": True, +} + +result_map = { + "ls": DbtLSKubernetesOperator(**base_kwargs), + "run": DbtRunKubernetesOperator(**base_kwargs), + "test": DbtTestKubernetesOperator(**base_kwargs), + "deps": DbtDepsKubernetesOperator(**base_kwargs), + "seed": DbtSeedKubernetesOperator(**base_kwargs), +} + + +def test_dbt_kubernetes_build_command(): + """ + Since we know that the KubernetesOperator is tested, we can just test that the + command is built correctly and added to the "arguments" parameter. + """ + for command_name, command_operator in result_map.items(): + command_operator.build_kube_args(context=MagicMock(), cmd_flags=MagicMock()) + assert command_operator.arguments == [ + "dbt", + command_name, + "--project-dir", + "my/dir", + "--vars", + "end_time: '{{ data_interval_end.strftime(''%Y%m%d%H%M%S'') }}'\n" + "start_time: '{{ data_interval_start.strftime(''%Y%m%d%H%M%S'') }}'\n", + "--no-version-check", + ] diff --git a/tests/dbt/operators/test_local.py b/tests/dbt/operators/test_local.py index ac2042431..105a5a8bf 100644 --- a/tests/dbt/operators/test_local.py +++ b/tests/dbt/operators/test_local.py @@ -23,7 +23,7 @@ def test_dbt_base_operator_add_global_flags() -> None: ) assert dbt_base_operator.add_global_flags() == [ "--project-dir", - "my/dir", + "/tmp/dbt/dir", "--vars", "end_time: '{{ data_interval_end.strftime(''%Y%m%d%H%M%S'') }}'\n" "start_time: '{{ data_interval_start.strftime(''%Y%m%d%H%M%S'') }}'\n", From c6cd38ad53b96fc2b2430961f40b0f23dd66d470 Mon Sep 17 00:00:00 2001 From: dimerman Date: Sun, 19 Mar 2023 10:12:08 -0700 Subject: [PATCH 26/33] Add full pod k8s tests --- tests/dbt/operators/test_kubernetes.py | 111 +++++++++++++++++++++++++ 1 file changed, 111 insertions(+) diff --git a/tests/dbt/operators/test_kubernetes.py b/tests/dbt/operators/test_kubernetes.py index 7e83d589d..5d5151a54 100644 --- a/tests/dbt/operators/test_kubernetes.py +++ b/tests/dbt/operators/test_kubernetes.py @@ -119,3 +119,114 @@ def test_dbt_kubernetes_build_command(): "start_time: '{{ data_interval_start.strftime(''%Y%m%d%H%M%S'') }}'\n", "--no-version-check", ] + + +@patch( + "airflow.providers.cncf.kubernetes.operators.kubernetes_pod.KubernetesPodOperator.hook" +) +def test_created_pod(test_hook): + test_hook.is_in_cluster = False + test_hook._get_namespace.return_value.to_dict.return_value = "foo" + ls_operator = DbtLSKubernetesOperator(**base_kwargs) + ls_operator.build_kube_args(context=MagicMock(), cmd_flags=MagicMock()) + pod_obj = ls_operator.build_pod_request_obj() + expected_result = { + "api_version": "v1", + "kind": "Pod", + "metadata": { + "annotations": {}, + "cluster_name": None, + "creation_timestamp": None, + "deletion_grace_period_seconds": None, + "deletion_timestamp": None, + "finalizers": None, + "generate_name": None, + "generation": None, + "labels": {"airflow_kpo_in_cluster": "False", "airflow_version": "2.5.1"}, + "managed_fields": None, + "name": pod_obj.metadata.name, + "namespace": "foo", + "owner_references": None, + "resource_version": None, + "self_link": None, + "uid": None, + }, + "spec": { + "active_deadline_seconds": None, + "affinity": {}, + "automount_service_account_token": None, + "containers": [ + { + "args": [ + "dbt", + "ls", + "--project-dir", + "my/dir", + "--vars", + "end_time: '{{ " + "data_interval_end.strftime(''%Y%m%d%H%M%S'') " + "}}'\n" + "start_time: '{{ " + "data_interval_start.strftime(''%Y%m%d%H%M%S'') " + "}}'\n", + "--no-version-check", + ], + "command": [], + "env": [], + "env_from": [], + "image": "my_image", + "image_pull_policy": None, + "lifecycle": None, + "liveness_probe": None, + "name": "base", + "ports": [], + "readiness_probe": None, + "resources": None, + "security_context": None, + "startup_probe": None, + "stdin": None, + "stdin_once": None, + "termination_message_path": None, + "termination_message_policy": None, + "tty": None, + "volume_devices": None, + "volume_mounts": [], + "working_dir": None, + } + ], + "dns_config": None, + "dns_policy": None, + "enable_service_links": None, + "ephemeral_containers": None, + "host_aliases": None, + "host_ipc": None, + "host_network": False, + "host_pid": None, + "hostname": None, + "image_pull_secrets": [], + "init_containers": [], + "node_name": None, + "node_selector": {}, + "os": None, + "overhead": None, + "preemption_policy": None, + "priority": None, + "priority_class_name": None, + "readiness_gates": None, + "restart_policy": "Never", + "runtime_class_name": None, + "scheduler_name": None, + "security_context": {}, + "service_account": None, + "service_account_name": None, + "set_hostname_as_fqdn": None, + "share_process_namespace": None, + "subdomain": None, + "termination_grace_period_seconds": None, + "tolerations": [], + "topology_spread_constraints": None, + "volumes": [], + }, + "status": None, + } + assert pod_obj.to_dict() == expected_result From 8460df23a8a511fd6eb53323f388f969af83b129 Mon Sep 17 00:00:00 2001 From: dimerman Date: Sun, 19 Mar 2023 17:57:37 -0700 Subject: [PATCH 27/33] Add environment variable to kubernetes test --- tests/dbt/operators/test_kubernetes.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/dbt/operators/test_kubernetes.py b/tests/dbt/operators/test_kubernetes.py index 5d5151a54..626b430a1 100644 --- a/tests/dbt/operators/test_kubernetes.py +++ b/tests/dbt/operators/test_kubernetes.py @@ -127,7 +127,9 @@ def test_dbt_kubernetes_build_command(): def test_created_pod(test_hook): test_hook.is_in_cluster = False test_hook._get_namespace.return_value.to_dict.return_value = "foo" - ls_operator = DbtLSKubernetesOperator(**base_kwargs) + ls_kwargs = {"env_vars": {"FOO": "BAR"}} + ls_kwargs.update(base_kwargs) + ls_operator = DbtLSKubernetesOperator(**ls_kwargs) ls_operator.build_kube_args(context=MagicMock(), cmd_flags=MagicMock()) pod_obj = ls_operator.build_pod_request_obj() expected_result = { @@ -172,7 +174,7 @@ def test_created_pod(test_hook): "--no-version-check", ], "command": [], - "env": [], + "env": [{"name": "FOO", "value": "BAR", "value_from": None}], "env_from": [], "image": "my_image", "image_pull_policy": None, From 4f4f708ea8edf375b014125a39ae70e87452afc8 Mon Sep 17 00:00:00 2001 From: dimerman Date: Sun, 19 Mar 2023 17:59:54 -0700 Subject: [PATCH 28/33] Fix to airflow version in kubernetes test --- tests/dbt/operators/test_kubernetes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/dbt/operators/test_kubernetes.py b/tests/dbt/operators/test_kubernetes.py index 626b430a1..3b11827f1 100644 --- a/tests/dbt/operators/test_kubernetes.py +++ b/tests/dbt/operators/test_kubernetes.py @@ -1,4 +1,5 @@ from pathlib import Path +from unittest import mock from unittest.mock import MagicMock, patch from airflow.utils.context import Context @@ -144,7 +145,7 @@ def test_created_pod(test_hook): "finalizers": None, "generate_name": None, "generation": None, - "labels": {"airflow_kpo_in_cluster": "False", "airflow_version": "2.5.1"}, + "labels": {"airflow_kpo_in_cluster": "False", "airflow_version": mock.ANY}, "managed_fields": None, "name": pod_obj.metadata.name, "namespace": "foo", From 5da361b97bed14827eb8ebfa0fe1f7aa6cf634d7 Mon Sep 17 00:00:00 2001 From: dimerman Date: Mon, 20 Mar 2023 08:10:56 -0700 Subject: [PATCH 29/33] Fix build_docs to point to correct path with new classes included in autodoc --- cosmos/providers/dbt/core/utils/__init__.py | 0 docs/Makefile | 2 +- docs/conf.py | 4 +--- .../cosmos.providers.dbt.DbtDag.rst | 12 +---------- ...smos.providers.dbt.DbtLSDockerOperator.rst | 6 ++++++ ....providers.dbt.DbtLSKubernetesOperator.rst | 6 ++++++ ...osmos.providers.dbt.DbtLSLocalOperator.rst | 6 ++++++ .../cosmos.providers.dbt.DbtLSOperator.rst | 3 ++- ...mos.providers.dbt.DbtRunDockerOperator.rst | 6 ++++++ ...providers.dbt.DbtRunKubernetesOperator.rst | 6 ++++++ ...smos.providers.dbt.DbtRunLocalOperator.rst | 6 ++++++ ...ders.dbt.DbtRunOperationDockerOperator.rst | 6 ++++++ ....dbt.DbtRunOperationKubernetesOperator.rst | 6 ++++++ ...iders.dbt.DbtRunOperationLocalOperator.rst | 6 ++++++ ....providers.dbt.DbtRunOperationOperator.rst | 3 ++- .../cosmos.providers.dbt.DbtRunOperator.rst | 3 ++- ...os.providers.dbt.DbtSeedDockerOperator.rst | 6 ++++++ ...roviders.dbt.DbtSeedKubernetesOperator.rst | 6 ++++++ ...mos.providers.dbt.DbtSeedLocalOperator.rst | 6 ++++++ .../cosmos.providers.dbt.DbtSeedOperator.rst | 3 ++- .../cosmos.providers.dbt.DbtTaskGroup.rst | 12 +---------- ...os.providers.dbt.DbtTestDockerOperator.rst | 6 ++++++ ...roviders.dbt.DbtTestKubernetesOperator.rst | 6 ++++++ ...mos.providers.dbt.DbtTestLocalOperator.rst | 6 ++++++ .../cosmos.providers.dbt.DbtTestOperator.rst | 3 ++- docs/reference/dbt-reference.rst | 20 ++++++++++++++----- docs/reference/index.rst | 2 +- 27 files changed, 120 insertions(+), 37 deletions(-) create mode 100644 cosmos/providers/dbt/core/utils/__init__.py create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtLSDockerOperator.rst create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtLSKubernetesOperator.rst create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtLSLocalOperator.rst create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtRunDockerOperator.rst create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtRunKubernetesOperator.rst create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtRunLocalOperator.rst create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationDockerOperator.rst create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationKubernetesOperator.rst create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationLocalOperator.rst create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtSeedDockerOperator.rst create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtSeedKubernetesOperator.rst create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtSeedLocalOperator.rst create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtTestDockerOperator.rst create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtTestKubernetesOperator.rst create mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtTestLocalOperator.rst diff --git a/cosmos/providers/dbt/core/utils/__init__.py b/cosmos/providers/dbt/core/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/docs/Makefile b/docs/Makefile index d4bb2cbb9..5f473ab77 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -3,7 +3,7 @@ # You can set these variables from the command line, and also # from the environment for the first two. -SPHINXOPTS ?= +SPHINXOPTS ?= -v SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build diff --git a/docs/conf.py b/docs/conf.py index 5d06bd231..71c7da42e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -28,9 +28,7 @@ autosummary_generate = True add_module_names = False -autodoc_mock_imports = [ - "airflow", -] +autodoc_mock_imports = ["airflow", "cosmos.providers.dbt"] templates_path = ["_templates"] exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtDag.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtDag.rst index 98a4649a7..733cf6543 100644 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtDag.rst +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtDag.rst @@ -3,14 +3,4 @@ .. currentmodule:: cosmos.providers.dbt -.. autoclass:: DbtDag - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DbtDag.__init__ +.. autodata:: DbtDag diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtLSDockerOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtLSDockerOperator.rst new file mode 100644 index 000000000..fdadb8077 --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtLSDockerOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtLSDockerOperator +======================================== + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtLSDockerOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtLSKubernetesOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtLSKubernetesOperator.rst new file mode 100644 index 000000000..412f003f1 --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtLSKubernetesOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtLSKubernetesOperator +============================================ + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtLSKubernetesOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtLSLocalOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtLSLocalOperator.rst new file mode 100644 index 000000000..1de25e24a --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtLSLocalOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtLSLocalOperator +======================================= + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtLSLocalOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtLSOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtLSOperator.rst index f1334feca..e338da591 100644 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtLSOperator.rst +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtLSOperator.rst @@ -18,7 +18,6 @@ ~DbtLSOperator.build_and_run_cmd ~DbtLSOperator.exception_handling ~DbtLSOperator.execute - ~DbtLSOperator.get_dbt_path ~DbtLSOperator.get_env ~DbtLSOperator.on_kill ~DbtLSOperator.run_command @@ -32,5 +31,7 @@ .. autosummary:: + ~DbtLSOperator.global_boolean_flags + ~DbtLSOperator.global_flags ~DbtLSOperator.template_fields ~DbtLSOperator.ui_color diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunDockerOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunDockerOperator.rst new file mode 100644 index 000000000..038b05417 --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtRunDockerOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtRunDockerOperator +========================================= + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtRunDockerOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunKubernetesOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunKubernetesOperator.rst new file mode 100644 index 000000000..6593584d9 --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtRunKubernetesOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtRunKubernetesOperator +============================================= + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtRunKubernetesOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunLocalOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunLocalOperator.rst new file mode 100644 index 000000000..b47d88644 --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtRunLocalOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtRunLocalOperator +======================================== + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtRunLocalOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationDockerOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationDockerOperator.rst new file mode 100644 index 000000000..558568f3a --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationDockerOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtRunOperationDockerOperator +================================================== + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtRunOperationDockerOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationKubernetesOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationKubernetesOperator.rst new file mode 100644 index 000000000..470e60aae --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationKubernetesOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtRunOperationKubernetesOperator +====================================================== + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtRunOperationKubernetesOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationLocalOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationLocalOperator.rst new file mode 100644 index 000000000..b6fc16916 --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationLocalOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtRunOperationLocalOperator +================================================= + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtRunOperationLocalOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationOperator.rst index d537f6430..c9fb93550 100644 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationOperator.rst +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationOperator.rst @@ -19,7 +19,6 @@ ~DbtRunOperationOperator.build_and_run_cmd ~DbtRunOperationOperator.exception_handling ~DbtRunOperationOperator.execute - ~DbtRunOperationOperator.get_dbt_path ~DbtRunOperationOperator.get_env ~DbtRunOperationOperator.on_kill ~DbtRunOperationOperator.run_command @@ -33,5 +32,7 @@ .. autosummary:: + ~DbtRunOperationOperator.global_boolean_flags + ~DbtRunOperationOperator.global_flags ~DbtRunOperationOperator.template_fields ~DbtRunOperationOperator.ui_color diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperator.rst index e15f62398..c2e0590cf 100644 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperator.rst +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperator.rst @@ -18,7 +18,6 @@ ~DbtRunOperator.build_and_run_cmd ~DbtRunOperator.exception_handling ~DbtRunOperator.execute - ~DbtRunOperator.get_dbt_path ~DbtRunOperator.get_env ~DbtRunOperator.on_kill ~DbtRunOperator.run_command @@ -32,6 +31,8 @@ .. autosummary:: + ~DbtRunOperator.global_boolean_flags + ~DbtRunOperator.global_flags ~DbtRunOperator.template_fields ~DbtRunOperator.ui_color ~DbtRunOperator.ui_fgcolor diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtSeedDockerOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtSeedDockerOperator.rst new file mode 100644 index 000000000..c9be1e047 --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtSeedDockerOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtSeedDockerOperator +========================================== + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtSeedDockerOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtSeedKubernetesOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtSeedKubernetesOperator.rst new file mode 100644 index 000000000..375dcb712 --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtSeedKubernetesOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtSeedKubernetesOperator +============================================== + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtSeedKubernetesOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtSeedLocalOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtSeedLocalOperator.rst new file mode 100644 index 000000000..fec2fcae9 --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtSeedLocalOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtSeedLocalOperator +========================================= + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtSeedLocalOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtSeedOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtSeedOperator.rst index c198224b6..b662ff7f2 100644 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtSeedOperator.rst +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtSeedOperator.rst @@ -19,7 +19,6 @@ ~DbtSeedOperator.build_and_run_cmd ~DbtSeedOperator.exception_handling ~DbtSeedOperator.execute - ~DbtSeedOperator.get_dbt_path ~DbtSeedOperator.get_env ~DbtSeedOperator.on_kill ~DbtSeedOperator.run_command @@ -33,5 +32,7 @@ .. autosummary:: + ~DbtSeedOperator.global_boolean_flags + ~DbtSeedOperator.global_flags ~DbtSeedOperator.template_fields ~DbtSeedOperator.ui_color diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtTaskGroup.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtTaskGroup.rst index e0c95e560..6f71220b5 100644 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtTaskGroup.rst +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtTaskGroup.rst @@ -3,14 +3,4 @@ .. currentmodule:: cosmos.providers.dbt -.. autoclass:: DbtTaskGroup - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DbtTaskGroup.__init__ +.. autodata:: DbtTaskGroup diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtTestDockerOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtTestDockerOperator.rst new file mode 100644 index 000000000..f96dc087b --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtTestDockerOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtTestDockerOperator +========================================== + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtTestDockerOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtTestKubernetesOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtTestKubernetesOperator.rst new file mode 100644 index 000000000..bff212df5 --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtTestKubernetesOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtTestKubernetesOperator +============================================== + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtTestKubernetesOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtTestLocalOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtTestLocalOperator.rst new file mode 100644 index 000000000..e4acb310e --- /dev/null +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtTestLocalOperator.rst @@ -0,0 +1,6 @@ +cosmos.providers.dbt.DbtTestLocalOperator +========================================= + +.. currentmodule:: cosmos.providers.dbt + +.. autodata:: DbtTestLocalOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtTestOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtTestOperator.rst index 4abdddae4..43424acfa 100644 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtTestOperator.rst +++ b/docs/reference/_generated/cosmos.providers.dbt.DbtTestOperator.rst @@ -18,7 +18,6 @@ ~DbtTestOperator.build_and_run_cmd ~DbtTestOperator.exception_handling ~DbtTestOperator.execute - ~DbtTestOperator.get_dbt_path ~DbtTestOperator.get_env ~DbtTestOperator.on_kill ~DbtTestOperator.run_command @@ -32,5 +31,7 @@ .. autosummary:: + ~DbtTestOperator.global_boolean_flags + ~DbtTestOperator.global_flags ~DbtTestOperator.template_fields ~DbtTestOperator.ui_color diff --git a/docs/reference/dbt-reference.rst b/docs/reference/dbt-reference.rst index e44238d07..afeb973ed 100644 --- a/docs/reference/dbt-reference.rst +++ b/docs/reference/dbt-reference.rst @@ -23,8 +23,18 @@ Operators :toctree: _generated/ :caption: Operators - DbtRunOperator - DbtTestOperator - DbtLSOperator - DbtSeedOperator - DbtRunOperationOperator + DbtLSLocalOperator, + DbtRunOperationLocalOperator, + DbtRunLocalOperator, + DbtSeedLocalOperator, + DbtTestLocalOperator, + DbtLSDockerOperator, + DbtRunOperationDockerOperator, + DbtRunDockerOperator, + DbtSeedDockerOperator, + DbtTestDockerOperator, + DbtLSKubernetesOperator, + DbtRunOperationKubernetesOperator, + DbtRunKubernetesOperator, + DbtSeedKubernetesOperator, + DbtTestKubernetesOperator, diff --git a/docs/reference/index.rst b/docs/reference/index.rst index aa7a54486..1471e2daf 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -2,6 +2,6 @@ API Reference ============= .. toctree:: - :maxdepth: 3 + :maxdepth: 5 dbt From 02735e82b9589b16f65623ed3c2f37ad44cbbc8c Mon Sep 17 00:00:00 2001 From: dimerman Date: Mon, 20 Mar 2023 09:03:42 -0700 Subject: [PATCH 30/33] Add airflow_version to test since mock.ANY seems to cause a failure --- docs/Makefile | 2 +- tests/dbt/operators/test_kubernetes.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index 5f473ab77..d4bb2cbb9 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -3,7 +3,7 @@ # You can set these variables from the command line, and also # from the environment for the first two. -SPHINXOPTS ?= -v +SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build diff --git a/tests/dbt/operators/test_kubernetes.py b/tests/dbt/operators/test_kubernetes.py index 3b11827f1..8d7cea36a 100644 --- a/tests/dbt/operators/test_kubernetes.py +++ b/tests/dbt/operators/test_kubernetes.py @@ -1,5 +1,4 @@ from pathlib import Path -from unittest import mock from unittest.mock import MagicMock, patch from airflow.utils.context import Context @@ -145,7 +144,10 @@ def test_created_pod(test_hook): "finalizers": None, "generate_name": None, "generation": None, - "labels": {"airflow_kpo_in_cluster": "False", "airflow_version": mock.ANY}, + "labels": { + "airflow_kpo_in_cluster": "False", + "airflow_version": pod_obj.metadata.labels["airflow_version"], + }, "managed_fields": None, "name": pod_obj.metadata.name, "namespace": "foo", From 38b93c1e91c85d6c1c9b5aac39171fdcbc89b5c1 Mon Sep 17 00:00:00 2001 From: dimerman Date: Mon, 20 Mar 2023 10:09:20 -0700 Subject: [PATCH 31/33] Manually set the dbt executable path, since we will assume the command is 'dbt' on a docker image (this can be changed in a future ticket) --- cosmos/providers/dbt/core/operators/docker.py | 4 ++++ cosmos/providers/dbt/core/operators/kubernetes.py | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/cosmos/providers/dbt/core/operators/docker.py b/cosmos/providers/dbt/core/operators/docker.py index 2cd768c51..a354b2b7c 100644 --- a/cosmos/providers/dbt/core/operators/docker.py +++ b/cosmos/providers/dbt/core/operators/docker.py @@ -37,6 +37,10 @@ def build_and_run_cmd(self, context: Context, cmd_flags: list[str] | None = None return super().execute(context) def build_command(self, cmd_flags, context): + # For the first round, we're going to assume that the command is dbt + # This means that we don't have openlineage support, but we will create a ticket + # to add that in the future + self.dbt_executable_path = "dbt" dbt_cmd, env_vars = self.build_cmd( context=context, cmd_flags=cmd_flags, handle_profile=False ) diff --git a/cosmos/providers/dbt/core/operators/kubernetes.py b/cosmos/providers/dbt/core/operators/kubernetes.py index 49d8509c7..d12224a28 100644 --- a/cosmos/providers/dbt/core/operators/kubernetes.py +++ b/cosmos/providers/dbt/core/operators/kubernetes.py @@ -49,6 +49,10 @@ def build_and_run_cmd(self, context: Context, cmd_flags: list[str] | None = None return super().execute(context) def build_kube_args(self, cmd_flags, context): + # For the first round, we're going to assume that the command is dbt + # This means that we don't have openlineage support, but we will create a ticket + # to add that in the future + self.dbt_executable_path = "dbt" dbt_cmd, env_vars = self.build_cmd( context=context, cmd_flags=cmd_flags, handle_profile=False ) From fe231a59bc9796039b7404a1d81fc0945cdc5f13 Mon Sep 17 00:00:00 2001 From: dimerman Date: Mon, 20 Mar 2023 10:17:44 -0700 Subject: [PATCH 32/33] doc fix 1 --- docs/conf.py | 2 +- docs/contributing.rst | 2 +- .../cosmos.providers.dbt.DbtLSOperator.rst | 37 ------------------ ....providers.dbt.DbtRunOperationOperator.rst | 38 ------------------- .../cosmos.providers.dbt.DbtRunOperator.rst | 38 ------------------- .../cosmos.providers.dbt.DbtSeedOperator.rst | 38 ------------------- .../cosmos.providers.dbt.DbtTestOperator.rst | 37 ------------------ 7 files changed, 2 insertions(+), 190 deletions(-) delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtLSOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtRunOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtSeedOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtTestOperator.rst diff --git a/docs/conf.py b/docs/conf.py index 71c7da42e..4df234269 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -28,7 +28,7 @@ autosummary_generate = True add_module_names = False -autodoc_mock_imports = ["airflow", "cosmos.providers.dbt"] +autodoc_mock_imports = ["airflow"] templates_path = ["_templates"] exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] diff --git a/docs/contributing.rst b/docs/contributing.rst index f6a332dca..19354eea0 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -25,7 +25,7 @@ Pre-requisites #. `git `_ Local Sandbox -************ +************* To create a sandbox where you can do real-time testing for your proposed to changes to Cosmos, see the corresponding development repository: `cosmos-dev `_. diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtLSOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtLSOperator.rst deleted file mode 100644 index e338da591..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtLSOperator.rst +++ /dev/null @@ -1,37 +0,0 @@ -cosmos.providers.dbt.DbtLSOperator -================================== - -.. currentmodule:: cosmos.providers.dbt - -.. autoclass:: DbtLSOperator - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DbtLSOperator.__init__ - ~DbtLSOperator.add_global_flags - ~DbtLSOperator.build_and_run_cmd - ~DbtLSOperator.exception_handling - ~DbtLSOperator.execute - ~DbtLSOperator.get_env - ~DbtLSOperator.on_kill - ~DbtLSOperator.run_command - ~DbtLSOperator.subprocess_hook - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DbtLSOperator.global_boolean_flags - ~DbtLSOperator.global_flags - ~DbtLSOperator.template_fields - ~DbtLSOperator.ui_color diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationOperator.rst deleted file mode 100644 index c9fb93550..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationOperator.rst +++ /dev/null @@ -1,38 +0,0 @@ -cosmos.providers.dbt.DbtRunOperationOperator -============================================ - -.. currentmodule:: cosmos.providers.dbt - -.. autoclass:: DbtRunOperationOperator - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DbtRunOperationOperator.__init__ - ~DbtRunOperationOperator.add_cmd_flags - ~DbtRunOperationOperator.add_global_flags - ~DbtRunOperationOperator.build_and_run_cmd - ~DbtRunOperationOperator.exception_handling - ~DbtRunOperationOperator.execute - ~DbtRunOperationOperator.get_env - ~DbtRunOperationOperator.on_kill - ~DbtRunOperationOperator.run_command - ~DbtRunOperationOperator.subprocess_hook - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DbtRunOperationOperator.global_boolean_flags - ~DbtRunOperationOperator.global_flags - ~DbtRunOperationOperator.template_fields - ~DbtRunOperationOperator.ui_color diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperator.rst deleted file mode 100644 index c2e0590cf..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperator.rst +++ /dev/null @@ -1,38 +0,0 @@ -cosmos.providers.dbt.DbtRunOperator -=================================== - -.. currentmodule:: cosmos.providers.dbt - -.. autoclass:: DbtRunOperator - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DbtRunOperator.__init__ - ~DbtRunOperator.add_global_flags - ~DbtRunOperator.build_and_run_cmd - ~DbtRunOperator.exception_handling - ~DbtRunOperator.execute - ~DbtRunOperator.get_env - ~DbtRunOperator.on_kill - ~DbtRunOperator.run_command - ~DbtRunOperator.subprocess_hook - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DbtRunOperator.global_boolean_flags - ~DbtRunOperator.global_flags - ~DbtRunOperator.template_fields - ~DbtRunOperator.ui_color - ~DbtRunOperator.ui_fgcolor diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtSeedOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtSeedOperator.rst deleted file mode 100644 index b662ff7f2..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtSeedOperator.rst +++ /dev/null @@ -1,38 +0,0 @@ -cosmos.providers.dbt.DbtSeedOperator -==================================== - -.. currentmodule:: cosmos.providers.dbt - -.. autoclass:: DbtSeedOperator - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DbtSeedOperator.__init__ - ~DbtSeedOperator.add_cmd_flags - ~DbtSeedOperator.add_global_flags - ~DbtSeedOperator.build_and_run_cmd - ~DbtSeedOperator.exception_handling - ~DbtSeedOperator.execute - ~DbtSeedOperator.get_env - ~DbtSeedOperator.on_kill - ~DbtSeedOperator.run_command - ~DbtSeedOperator.subprocess_hook - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DbtSeedOperator.global_boolean_flags - ~DbtSeedOperator.global_flags - ~DbtSeedOperator.template_fields - ~DbtSeedOperator.ui_color diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtTestOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtTestOperator.rst deleted file mode 100644 index 43424acfa..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtTestOperator.rst +++ /dev/null @@ -1,37 +0,0 @@ -cosmos.providers.dbt.DbtTestOperator -==================================== - -.. currentmodule:: cosmos.providers.dbt - -.. autoclass:: DbtTestOperator - - - .. automethod:: __init__ - - - .. rubric:: Methods - - .. autosummary:: - - ~DbtTestOperator.__init__ - ~DbtTestOperator.add_global_flags - ~DbtTestOperator.build_and_run_cmd - ~DbtTestOperator.exception_handling - ~DbtTestOperator.execute - ~DbtTestOperator.get_env - ~DbtTestOperator.on_kill - ~DbtTestOperator.run_command - ~DbtTestOperator.subprocess_hook - - - - - - .. rubric:: Attributes - - .. autosummary:: - - ~DbtTestOperator.global_boolean_flags - ~DbtTestOperator.global_flags - ~DbtTestOperator.template_fields - ~DbtTestOperator.ui_color From f48ed09cbc1eb29102ce23229ac34959f2d969df Mon Sep 17 00:00:00 2001 From: dimerman Date: Mon, 20 Mar 2023 11:03:05 -0700 Subject: [PATCH 33/33] switch to using autoapi for generating reference docs --- cosmos/core/__init__.py | 0 cosmos/core/graph/__init__.py | 0 cosmos/providers/dbt/__init__.py | 12 +- docs/Makefile | 2 +- docs/autoapi/cosmos/core/airflow/index.rst | 62 +++++ .../cosmos/core/graph/entities/index.rst | 94 ++++++++ docs/autoapi/cosmos/core/graph/index.rst | 13 ++ docs/autoapi/cosmos/core/index.rst | 22 ++ docs/autoapi/cosmos/index.rst | 26 +++ .../cosmos/providers/dbt/constants/index.rst | 10 + .../cosmos/providers/dbt/core/index.rst | 15 ++ .../dbt/core/operators/base/index.rst | 110 +++++++++ .../dbt/core/operators/docker/index.rst | 201 ++++++++++++++++ .../providers/dbt/core/operators/index.rst | 16 ++ .../dbt/core/operators/kubernetes/index.rst | 186 +++++++++++++++ .../dbt/core/operators/local/index.rst | 220 ++++++++++++++++++ .../dbt/core/profiles/bigquery/index.rst | 35 +++ .../dbt/core/profiles/databricks/index.rst | 44 ++++ .../providers/dbt/core/profiles/index.rst | 130 +++++++++++ .../dbt/core/profiles/postgres/index.rst | 35 +++ .../dbt/core/profiles/redshift/index.rst | 35 +++ .../dbt/core/profiles/snowflake/index.rst | 39 ++++ .../utils/data_aware_scheduling/index.rst | 20 ++ .../dbt/core/utils/file_syncing/index.rst | 40 ++++ .../cosmos/providers/dbt/core/utils/index.rst | 15 ++ .../core/utils/profiles_generator/index.rst | 44 ++++ .../cosmos/providers/dbt/dag/index.rst | 44 ++++ docs/autoapi/cosmos/providers/dbt/index.rst | 31 +++ .../cosmos/providers/dbt/parser/index.rst | 13 ++ .../providers/dbt/parser/project/index.rst | 153 ++++++++++++ .../cosmos/providers/dbt/render/index.rst | 56 +++++ .../cosmos/providers/dbt/task_group/index.rst | 44 ++++ docs/autoapi/cosmos/providers/index.rst | 13 ++ docs/autoapi/index.rst | 11 + docs/conf.py | 7 +- docs/index.rst | 2 +- .../cosmos.providers.dbt.DbtDag.rst | 6 - ...smos.providers.dbt.DbtLSDockerOperator.rst | 6 - ....providers.dbt.DbtLSKubernetesOperator.rst | 6 - ...osmos.providers.dbt.DbtLSLocalOperator.rst | 6 - ...mos.providers.dbt.DbtRunDockerOperator.rst | 6 - ...providers.dbt.DbtRunKubernetesOperator.rst | 6 - ...smos.providers.dbt.DbtRunLocalOperator.rst | 6 - ...ders.dbt.DbtRunOperationDockerOperator.rst | 6 - ....dbt.DbtRunOperationKubernetesOperator.rst | 6 - ...iders.dbt.DbtRunOperationLocalOperator.rst | 6 - ...os.providers.dbt.DbtSeedDockerOperator.rst | 6 - ...roviders.dbt.DbtSeedKubernetesOperator.rst | 6 - ...mos.providers.dbt.DbtSeedLocalOperator.rst | 6 - .../cosmos.providers.dbt.DbtTaskGroup.rst | 6 - ...os.providers.dbt.DbtTestDockerOperator.rst | 6 - ...roviders.dbt.DbtTestKubernetesOperator.rst | 6 - ...mos.providers.dbt.DbtTestLocalOperator.rst | 6 - docs/reference/dbt-reference.rst | 40 ---- docs/reference/index.rst | 7 - pyproject.toml | 1 + 56 files changed, 1789 insertions(+), 161 deletions(-) create mode 100644 cosmos/core/__init__.py create mode 100644 cosmos/core/graph/__init__.py create mode 100644 docs/autoapi/cosmos/core/airflow/index.rst create mode 100644 docs/autoapi/cosmos/core/graph/entities/index.rst create mode 100644 docs/autoapi/cosmos/core/graph/index.rst create mode 100644 docs/autoapi/cosmos/core/index.rst create mode 100644 docs/autoapi/cosmos/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/constants/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/operators/base/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/operators/docker/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/operators/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/operators/kubernetes/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/operators/local/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/profiles/bigquery/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/profiles/databricks/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/profiles/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/profiles/postgres/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/profiles/redshift/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/profiles/snowflake/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/utils/data_aware_scheduling/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/utils/file_syncing/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/utils/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/core/utils/profiles_generator/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/dag/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/parser/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/parser/project/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/render/index.rst create mode 100644 docs/autoapi/cosmos/providers/dbt/task_group/index.rst create mode 100644 docs/autoapi/cosmos/providers/index.rst create mode 100644 docs/autoapi/index.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtDag.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtLSDockerOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtLSKubernetesOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtLSLocalOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtRunDockerOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtRunKubernetesOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtRunLocalOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationDockerOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationKubernetesOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationLocalOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtSeedDockerOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtSeedKubernetesOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtSeedLocalOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtTaskGroup.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtTestDockerOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtTestKubernetesOperator.rst delete mode 100644 docs/reference/_generated/cosmos.providers.dbt.DbtTestLocalOperator.rst delete mode 100644 docs/reference/dbt-reference.rst delete mode 100644 docs/reference/index.rst diff --git a/cosmos/core/__init__.py b/cosmos/core/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cosmos/core/graph/__init__.py b/cosmos/core/graph/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/cosmos/providers/dbt/__init__.py b/cosmos/providers/dbt/__init__.py index ee8ffd3db..a015e104a 100644 --- a/cosmos/providers/dbt/__init__.py +++ b/cosmos/providers/dbt/__init__.py @@ -2,14 +2,14 @@ dbt support for Airflow. Contains dags, task groups, and operators. """ -from .core.operators.docker import ( +from cosmos.providers.dbt.core.operators.docker import ( DbtLSDockerOperator, DbtRunDockerOperator, DbtRunOperationDockerOperator, DbtSeedDockerOperator, DbtTestDockerOperator, ) -from .core.operators.kubernetes import ( +from cosmos.providers.dbt.core.operators.kubernetes import ( DbtLSKubernetesOperator, DbtRunKubernetesOperator, DbtRunOperationKubernetesOperator, @@ -18,7 +18,7 @@ ) # re-export the operators -from .core.operators.local import ( +from cosmos.providers.dbt.core.operators.local import ( DbtLSLocalOperator, DbtRunLocalOperator, DbtRunOperationLocalOperator, @@ -27,11 +27,11 @@ ) # re-export user facing utilities -from .core.utils.data_aware_scheduling import get_dbt_dataset +from cosmos.providers.dbt.core.utils.data_aware_scheduling import get_dbt_dataset # re-export the dag and task group -from .dag import DbtDag -from .task_group import DbtTaskGroup +from cosmos.providers.dbt.dag import DbtDag +from cosmos.providers.dbt.task_group import DbtTaskGroup __all__ = [ DbtLSLocalOperator, diff --git a/docs/Makefile b/docs/Makefile index d4bb2cbb9..5f473ab77 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -3,7 +3,7 @@ # You can set these variables from the command line, and also # from the environment for the first two. -SPHINXOPTS ?= +SPHINXOPTS ?= -v SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build diff --git a/docs/autoapi/cosmos/core/airflow/index.rst b/docs/autoapi/cosmos/core/airflow/index.rst new file mode 100644 index 000000000..4331aa163 --- /dev/null +++ b/docs/autoapi/cosmos/core/airflow/index.rst @@ -0,0 +1,62 @@ +:py:mod:`cosmos.core.airflow` +============================= + +.. py:module:: cosmos.core.airflow + + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. autoapisummary:: + + cosmos.core.airflow.CosmosDag + cosmos.core.airflow.CosmosTaskGroup + + + +Functions +~~~~~~~~~ + +.. autoapisummary:: + + cosmos.core.airflow.get_airflow_task + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.core.airflow.logger + + +.. py:data:: logger + + + +.. py:class:: CosmosDag(cosmos_group: cosmos.core.graph.entities.Group, *args: Any, **kwargs: Any) + + Bases: :py:obj:`airflow.models.dag.DAG` + + Render a Group as an Airflow DAG. Subclass of Airflow DAG. + + +.. py:class:: CosmosTaskGroup(cosmos_group: cosmos.core.graph.entities.Group, dag: Optional[airflow.models.dag.DAG] = None, *args: Any, **kwargs: Any) + + Bases: :py:obj:`airflow.utils.task_group.TaskGroup` + + Render a Group as an Airflow TaskGroup. Subclass of Airflow TaskGroup. + + +.. py:function:: get_airflow_task(task: cosmos.core.graph.entities.Task, dag: airflow.models.dag.DAG, task_group: Optional[airflow.utils.task_group.TaskGroup] = None) -> airflow.models.BaseOperator + + Get the Airflow Operator class for a Task. + + :param task: The Task to get the Operator for + + :return: The Operator class + :rtype: BaseOperator diff --git a/docs/autoapi/cosmos/core/graph/entities/index.rst b/docs/autoapi/cosmos/core/graph/entities/index.rst new file mode 100644 index 000000000..b8b349410 --- /dev/null +++ b/docs/autoapi/cosmos/core/graph/entities/index.rst @@ -0,0 +1,94 @@ +:py:mod:`cosmos.core.graph.entities` +==================================== + +.. py:module:: cosmos.core.graph.entities + + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. autoapisummary:: + + cosmos.core.graph.entities.CosmosEntity + cosmos.core.graph.entities.Group + cosmos.core.graph.entities.Task + + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.core.graph.entities.logger + + +.. py:data:: logger + + + +.. py:class:: CosmosEntity + + A CosmosEntity defines a base class for all Cosmos entities. + + :param id: The human-readable, unique identifier of the entity + :param upstream_entity_ids: The ids of the entities that this entity is upstream of + + .. py:attribute:: id + :type: str + + + + .. py:attribute:: upstream_entity_ids + :type: List[str] + + + + .. py:method:: add_upstream(entity: CosmosEntity) -> None + + Add an upstream entity to the entity. + + :param entity: The entity to add + + + +.. py:class:: Group + + Bases: :py:obj:`CosmosEntity` + + A Group represents a collection of entities that are connected by dependencies. + + .. py:attribute:: entities + :type: List[CosmosEntity] + + + + .. py:method:: add_entity(entity: CosmosEntity) -> None + + Add an entity to the group. + + :param entity: The entity to add + + + +.. py:class:: Task + + Bases: :py:obj:`CosmosEntity` + + A task represents a single node in the DAG. + + :param operator_class: The name of the operator class to use for this task + :param arguments: The arguments to pass to the operator + + .. py:attribute:: operator_class + :type: str + :value: 'airflow.operators.dummy.DummyOperator' + + + + .. py:attribute:: arguments + :type: Dict[str, Any] diff --git a/docs/autoapi/cosmos/core/graph/index.rst b/docs/autoapi/cosmos/core/graph/index.rst new file mode 100644 index 000000000..b225c4c4c --- /dev/null +++ b/docs/autoapi/cosmos/core/graph/index.rst @@ -0,0 +1,13 @@ +:py:mod:`cosmos.core.graph` +=========================== + +.. py:module:: cosmos.core.graph + + +Submodules +---------- +.. toctree:: + :titlesonly: + :maxdepth: 1 + + entities/index.rst diff --git a/docs/autoapi/cosmos/core/index.rst b/docs/autoapi/cosmos/core/index.rst new file mode 100644 index 000000000..e58478083 --- /dev/null +++ b/docs/autoapi/cosmos/core/index.rst @@ -0,0 +1,22 @@ +:py:mod:`cosmos.core` +===================== + +.. py:module:: cosmos.core + + +Subpackages +----------- +.. toctree:: + :titlesonly: + :maxdepth: 3 + + graph/index.rst + + +Submodules +---------- +.. toctree:: + :titlesonly: + :maxdepth: 1 + + airflow/index.rst diff --git a/docs/autoapi/cosmos/index.rst b/docs/autoapi/cosmos/index.rst new file mode 100644 index 000000000..12a103e13 --- /dev/null +++ b/docs/autoapi/cosmos/index.rst @@ -0,0 +1,26 @@ +:py:mod:`cosmos` +================ + +.. py:module:: cosmos + +.. autoapi-nested-parse:: + + Astronomer Cosmos is a library for rendering 3rd party workflows in Airflow. + + + +Subpackages +----------- +.. toctree:: + :titlesonly: + :maxdepth: 3 + + core/index.rst + providers/index.rst + + +Package Contents +---------------- + +.. py:data:: __version__ + :value: '0.4.3dev' diff --git a/docs/autoapi/cosmos/providers/dbt/constants/index.rst b/docs/autoapi/cosmos/providers/dbt/constants/index.rst new file mode 100644 index 000000000..963d56641 --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/constants/index.rst @@ -0,0 +1,10 @@ +:py:mod:`cosmos.providers.dbt.constants` +======================================== + +.. py:module:: cosmos.providers.dbt.constants + + +Module Contents +--------------- + +.. py:data:: DBT_PROFILE_PATH diff --git a/docs/autoapi/cosmos/providers/dbt/core/index.rst b/docs/autoapi/cosmos/providers/dbt/core/index.rst new file mode 100644 index 000000000..d299150d4 --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/index.rst @@ -0,0 +1,15 @@ +:py:mod:`cosmos.providers.dbt.core` +=================================== + +.. py:module:: cosmos.providers.dbt.core + + +Subpackages +----------- +.. toctree:: + :titlesonly: + :maxdepth: 3 + + operators/index.rst + profiles/index.rst + utils/index.rst diff --git a/docs/autoapi/cosmos/providers/dbt/core/operators/base/index.rst b/docs/autoapi/cosmos/providers/dbt/core/operators/base/index.rst new file mode 100644 index 000000000..aed90aacb --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/operators/base/index.rst @@ -0,0 +1,110 @@ +:py:mod:`cosmos.providers.dbt.core.operators.base` +================================================== + +.. py:module:: cosmos.providers.dbt.core.operators.base + + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.operators.base.DbtBaseOperator + + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.operators.base.logger + + +.. py:data:: logger + + + +.. py:class:: DbtBaseOperator(project_dir: str, conn_id: str, base_cmd: str | list[str] = None, select: str = None, exclude: str = None, selector: str = None, vars: dict = None, models: str = None, cache_selected_only: bool = False, no_version_check: bool = False, fail_fast: bool = False, quiet: bool = False, warn_error: bool = False, db_name: str = None, schema: str = None, env: dict = None, append_env: bool = False, output_encoding: str = 'utf-8', skip_exit_code: int = 99, cancel_query_on_kill: bool = True, dbt_executable_path: str = 'dbt', dbt_cmd_flags: Dict[str, Any] = {}, **kwargs) + + Bases: :py:obj:`airflow.models.baseoperator.BaseOperator` + + Executes a dbt core cli command. + + :param project_dir: Which directory to look in for the dbt_project.yml file. Default is the current working + directory and its parents. + :param conn_id: The airflow connection to use as the target + :param base_cmd: dbt sub-command to run (i.e ls, seed, run, test, etc.) + :param select: dbt optional argument that specifies which nodes to include. + :param exclude: dbt optional argument that specifies which models to exclude. + :param selector: dbt optional argument - the selector name to use, as defined in selectors.yml + :param vars: dbt optional argument - Supply variables to the project. This argument overrides variables + defined in your dbt_project.yml file. This argument should be a YAML + string, eg. '{my_variable: my_value}' (templated) + :param models: dbt optional argument that specifies which nodes to include. + :param cache_selected_only: + :param no_version_check: dbt optional argument - If set, skip ensuring dbt's version matches the one specified in + the dbt_project.yml file ('require-dbt-version') + :param fail_fast: dbt optional argument to make dbt exit immediately if a single resource fails to build. + :param quiet: dbt optional argument to show only error logs in stdout + :param warn_error: dbt optional argument to convert dbt warnings into errors + :param db_name: override the target db instead of the one supplied in the airflow connection + :param schema: override the target schema instead of the one supplied in the airflow connection + :param env: If env is not None, it must be a dict that defines the + environment variables for the new process; these are used instead + of inheriting the current process environment, which is the default + behavior. (templated) + :param append_env: If False(default) uses the environment variables passed in env params + and does not inherit the current process environment. If True, inherits the environment variables + from current passes and then environment variable passed by the user will either update the existing + inherited environment variables or the new variables gets appended to it + :param output_encoding: Output encoding of bash command + :param skip_exit_code: If task exits with this exit code, leave the task + in ``skipped`` state (default: 99). If set to ``None``, any non-zero + exit code will be treated as a failure. + :param cancel_query_on_kill: If true, then cancel any running queries when the task's on_kill() is executed. + Otherwise, the query will keep running when the task is killed. + :param dbt_executable_path: Path to dbt executable can be used with venv + (i.e. /home/astro/.pyenv/versions/dbt_venv/bin/dbt) + :param dbt_cmd_flags: Flags passed to dbt command override those that are calculated. + + .. py:attribute:: template_fields + :type: Sequence[str] + :value: ('env', 'vars') + + + + .. py:attribute:: global_flags + :value: ('project_dir', 'select', 'exclude', 'selector', 'vars', 'models', 'profiles_dir', 'profile') + + + + .. py:attribute:: global_boolean_flags + :value: ('no_version_check', 'cache_selected_only', 'fail_fast', 'quiet', 'warn_error') + + + + .. py:attribute:: intercept_flag + :value: True + + + + .. py:method:: get_env(context: airflow.utils.context.Context, profile_vars: dict[str, str]) -> dict[str, str] + + Builds the set of environment variables to be exposed for the bash command. + The order of determination is: + 1. Environment variables created for dbt profiles, `profile_vars`. + 2. The Airflow context as environment variables. + 3. System environment variables if dbt_args{"append_env": True} + 4. User specified environment variables, through dbt_args{"vars": {"key": "val"}} + If a user accidentally uses a key that is found earlier in the determination order then it is overwritten. + + + .. py:method:: add_global_flags() -> list[str] + + + .. py:method:: build_cmd(context: airflow.utils.context.Context, cmd_flags: list[str] | None = None, handle_profile: bool = True) -> Tuple[list[str], dict] diff --git a/docs/autoapi/cosmos/providers/dbt/core/operators/docker/index.rst b/docs/autoapi/cosmos/providers/dbt/core/operators/docker/index.rst new file mode 100644 index 000000000..497711c8a --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/operators/docker/index.rst @@ -0,0 +1,201 @@ +:py:mod:`cosmos.providers.dbt.core.operators.docker` +==================================================== + +.. py:module:: cosmos.providers.dbt.core.operators.docker + + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.operators.docker.DbtDockerBaseOperator + cosmos.providers.dbt.core.operators.docker.DbtLSDockerOperator + cosmos.providers.dbt.core.operators.docker.DbtSeedDockerOperator + cosmos.providers.dbt.core.operators.docker.DbtRunDockerOperator + cosmos.providers.dbt.core.operators.docker.DbtTestDockerOperator + cosmos.providers.dbt.core.operators.docker.DbtRunOperationDockerOperator + cosmos.providers.dbt.core.operators.docker.DbtDepsDockerOperator + + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.operators.docker.logger + + +.. py:data:: logger + + + +.. py:class:: DbtDockerBaseOperator(image: str, **kwargs) + + Bases: :py:obj:`airflow.providers.docker.operators.docker.DockerOperator`, :py:obj:`cosmos.providers.dbt.core.operators.base.DbtBaseOperator` + + Executes a dbt core cli command in a Docker container. + + + .. py:attribute:: template_fields + :type: Sequence[str] + + + + .. py:attribute:: intercept_flag + :value: False + + + + .. py:method:: build_and_run_cmd(context: airflow.utils.context.Context, cmd_flags: list[str] | None = None) + + + .. py:method:: build_command(cmd_flags, context) + + + +.. py:class:: DbtLSDockerOperator(**kwargs) + + Bases: :py:obj:`DbtDockerBaseOperator` + + Executes a dbt core ls command. + + .. py:attribute:: ui_color + :value: '#DBCDF6' + + + + .. py:method:: execute(context: airflow.utils.context.Context) + + This is the main method to derive when creating an operator. + Context is the same dictionary used as when rendering jinja templates. + + Refer to get_template_context for more context. + + + +.. py:class:: DbtSeedDockerOperator(full_refresh: bool = False, **kwargs) + + Bases: :py:obj:`DbtDockerBaseOperator` + + Executes a dbt core seed command. + + :param full_refresh: dbt optional arg - dbt will treat incremental models as table models + + .. py:attribute:: ui_color + :value: '#F58D7E' + + + + .. py:method:: add_cmd_flags() + + + .. py:method:: execute(context: airflow.utils.context.Context) + + This is the main method to derive when creating an operator. + Context is the same dictionary used as when rendering jinja templates. + + Refer to get_template_context for more context. + + + +.. py:class:: DbtRunDockerOperator(**kwargs) + + Bases: :py:obj:`DbtDockerBaseOperator` + + Executes a dbt core run command. + + .. py:attribute:: ui_color + :value: '#7352BA' + + + + .. py:attribute:: ui_fgcolor + :value: '#F4F2FC' + + + + .. py:method:: execute(context: airflow.utils.context.Context) + + This is the main method to derive when creating an operator. + Context is the same dictionary used as when rendering jinja templates. + + Refer to get_template_context for more context. + + + +.. py:class:: DbtTestDockerOperator(**kwargs) + + Bases: :py:obj:`DbtDockerBaseOperator` + + Executes a dbt core test command. + + .. py:attribute:: ui_color + :value: '#8194E0' + + + + .. py:method:: execute(context: airflow.utils.context.Context) + + This is the main method to derive when creating an operator. + Context is the same dictionary used as when rendering jinja templates. + + Refer to get_template_context for more context. + + + +.. py:class:: DbtRunOperationDockerOperator(macro_name: str, args: dict = None, **kwargs) + + Bases: :py:obj:`DbtDockerBaseOperator` + + Executes a dbt core run-operation command. + + :param macro_name: name of macro to execute + :param args: Supply arguments to the macro. This dictionary will be mapped to the keyword arguments defined in the + selected macro. + + .. py:attribute:: ui_color + :value: '#8194E0' + + + + .. py:attribute:: template_fields + :type: Sequence[str] + :value: 'args' + + + + .. py:method:: add_cmd_flags() + + + .. py:method:: execute(context: airflow.utils.context.Context) + + This is the main method to derive when creating an operator. + Context is the same dictionary used as when rendering jinja templates. + + Refer to get_template_context for more context. + + + +.. py:class:: DbtDepsDockerOperator(**kwargs) + + Bases: :py:obj:`DbtDockerBaseOperator` + + Executes a dbt core deps command. + + .. py:attribute:: ui_color + :value: '#8194E0' + + + + .. py:method:: execute(context: airflow.utils.context.Context) + + This is the main method to derive when creating an operator. + Context is the same dictionary used as when rendering jinja templates. + + Refer to get_template_context for more context. diff --git a/docs/autoapi/cosmos/providers/dbt/core/operators/index.rst b/docs/autoapi/cosmos/providers/dbt/core/operators/index.rst new file mode 100644 index 000000000..2d4d1cec6 --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/operators/index.rst @@ -0,0 +1,16 @@ +:py:mod:`cosmos.providers.dbt.core.operators` +============================================= + +.. py:module:: cosmos.providers.dbt.core.operators + + +Submodules +---------- +.. toctree:: + :titlesonly: + :maxdepth: 1 + + base/index.rst + docker/index.rst + kubernetes/index.rst + local/index.rst diff --git a/docs/autoapi/cosmos/providers/dbt/core/operators/kubernetes/index.rst b/docs/autoapi/cosmos/providers/dbt/core/operators/kubernetes/index.rst new file mode 100644 index 000000000..e84093cf7 --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/operators/kubernetes/index.rst @@ -0,0 +1,186 @@ +:py:mod:`cosmos.providers.dbt.core.operators.kubernetes` +======================================================== + +.. py:module:: cosmos.providers.dbt.core.operators.kubernetes + + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.operators.kubernetes.DbtKubernetesBaseOperator + cosmos.providers.dbt.core.operators.kubernetes.DbtLSKubernetesOperator + cosmos.providers.dbt.core.operators.kubernetes.DbtSeedKubernetesOperator + cosmos.providers.dbt.core.operators.kubernetes.DbtRunKubernetesOperator + cosmos.providers.dbt.core.operators.kubernetes.DbtTestKubernetesOperator + cosmos.providers.dbt.core.operators.kubernetes.DbtRunOperationKubernetesOperator + cosmos.providers.dbt.core.operators.kubernetes.DbtDepsKubernetesOperator + + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.operators.kubernetes.logger + + +.. py:data:: logger + + + +.. py:class:: DbtKubernetesBaseOperator(**kwargs) + + Bases: :py:obj:`airflow.providers.cncf.kubernetes.operators.kubernetes_pod.KubernetesPodOperator`, :py:obj:`cosmos.providers.dbt.core.operators.base.DbtBaseOperator` + + Executes a dbt core cli command in a Kubernetes Pod. + + + .. py:attribute:: template_fields + :type: Sequence[str] + + + + .. py:attribute:: intercept_flag + :value: False + + + + .. py:method:: build_env_args(env: dict) -> list[kubernetes.client.models.V1EnvVar] + + + .. py:method:: build_and_run_cmd(context: airflow.utils.context.Context, cmd_flags: list[str] | None = None) + + + .. py:method:: build_kube_args(cmd_flags, context) + + + +.. py:class:: DbtLSKubernetesOperator(**kwargs) + + Bases: :py:obj:`DbtKubernetesBaseOperator` + + Executes a dbt core ls command. + + .. py:attribute:: ui_color + :value: '#DBCDF6' + + + + .. py:method:: execute(context: airflow.utils.context.Context) + + Based on the deferrable parameter runs the pod asynchronously or synchronously + + + +.. py:class:: DbtSeedKubernetesOperator(full_refresh: bool = False, **kwargs) + + Bases: :py:obj:`DbtKubernetesBaseOperator` + + Executes a dbt core seed command. + + :param full_refresh: dbt optional arg - dbt will treat incremental models as table models + + .. py:attribute:: ui_color + :value: '#F58D7E' + + + + .. py:method:: add_cmd_flags() + + + .. py:method:: execute(context: airflow.utils.context.Context) + + Based on the deferrable parameter runs the pod asynchronously or synchronously + + + +.. py:class:: DbtRunKubernetesOperator(**kwargs) + + Bases: :py:obj:`DbtKubernetesBaseOperator` + + Executes a dbt core run command. + + .. py:attribute:: ui_color + :value: '#7352BA' + + + + .. py:attribute:: ui_fgcolor + :value: '#F4F2FC' + + + + .. py:method:: execute(context: airflow.utils.context.Context) + + Based on the deferrable parameter runs the pod asynchronously or synchronously + + + +.. py:class:: DbtTestKubernetesOperator(**kwargs) + + Bases: :py:obj:`DbtKubernetesBaseOperator` + + Executes a dbt core test command. + + .. py:attribute:: ui_color + :value: '#8194E0' + + + + .. py:method:: execute(context: airflow.utils.context.Context) + + Based on the deferrable parameter runs the pod asynchronously or synchronously + + + +.. py:class:: DbtRunOperationKubernetesOperator(macro_name: str, args: dict = None, **kwargs) + + Bases: :py:obj:`DbtKubernetesBaseOperator` + + Executes a dbt core run-operation command. + + :param macro_name: name of macro to execute + :param args: Supply arguments to the macro. This dictionary will be mapped to the keyword arguments defined in the + selected macro. + + .. py:attribute:: ui_color + :value: '#8194E0' + + + + .. py:attribute:: template_fields + :type: Sequence[str] + :value: 'args' + + + + .. py:method:: add_cmd_flags() + + + .. py:method:: execute(context: airflow.utils.context.Context) + + Based on the deferrable parameter runs the pod asynchronously or synchronously + + + +.. py:class:: DbtDepsKubernetesOperator(**kwargs) + + Bases: :py:obj:`DbtKubernetesBaseOperator` + + Executes a dbt core deps command. + + .. py:attribute:: ui_color + :value: '#8194E0' + + + + .. py:method:: execute(context: airflow.utils.context.Context) + + Based on the deferrable parameter runs the pod asynchronously or synchronously diff --git a/docs/autoapi/cosmos/providers/dbt/core/operators/local/index.rst b/docs/autoapi/cosmos/providers/dbt/core/operators/local/index.rst new file mode 100644 index 000000000..527d6a5e0 --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/operators/local/index.rst @@ -0,0 +1,220 @@ +:py:mod:`cosmos.providers.dbt.core.operators.local` +=================================================== + +.. py:module:: cosmos.providers.dbt.core.operators.local + + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.operators.local.DbtLocalBaseOperator + cosmos.providers.dbt.core.operators.local.DbtLSLocalOperator + cosmos.providers.dbt.core.operators.local.DbtSeedLocalOperator + cosmos.providers.dbt.core.operators.local.DbtRunLocalOperator + cosmos.providers.dbt.core.operators.local.DbtTestLocalOperator + cosmos.providers.dbt.core.operators.local.DbtRunOperationLocalOperator + cosmos.providers.dbt.core.operators.local.DbtDepsLocalOperator + + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.operators.local.logger + + +.. py:data:: logger + + + +.. py:class:: DbtLocalBaseOperator(**kwargs) + + Bases: :py:obj:`cosmos.providers.dbt.core.operators.base.DbtBaseOperator` + + Executes a dbt core cli command locally. + + + .. py:attribute:: template_fields + :type: Sequence[str] + + + + .. py:method:: subprocess_hook() + + Returns hook for running the bash command. + + + .. py:method:: exception_handling(result: airflow.hooks.subprocess.SubprocessResult) + + + .. py:method:: run_command(cmd: list[str], env: dict[str, str]) -> airflow.hooks.subprocess.SubprocessResult + + + .. py:method:: build_and_run_cmd(context: airflow.utils.context.Context, cmd_flags: list[str] | None = None) -> airflow.hooks.subprocess.SubprocessResult + + + .. py:method:: execute(context: airflow.utils.context.Context) -> str + + This is the main method to derive when creating an operator. + Context is the same dictionary used as when rendering jinja templates. + + Refer to get_template_context for more context. + + + .. py:method:: on_kill() -> None + + Override this method to clean up subprocesses when a task instance + gets killed. Any use of the threading, subprocess or multiprocessing + module within an operator needs to be cleaned up, or it will leave + ghost processes behind. + + + +.. py:class:: DbtLSLocalOperator(**kwargs) + + Bases: :py:obj:`DbtLocalBaseOperator` + + Executes a dbt core ls command. + + .. py:attribute:: ui_color + :value: '#DBCDF6' + + + + .. py:method:: execute(context: airflow.utils.context.Context) + + This is the main method to derive when creating an operator. + Context is the same dictionary used as when rendering jinja templates. + + Refer to get_template_context for more context. + + + +.. py:class:: DbtSeedLocalOperator(full_refresh: bool = False, **kwargs) + + Bases: :py:obj:`DbtLocalBaseOperator` + + Executes a dbt core seed command. + + :param full_refresh: dbt optional arg - dbt will treat incremental models as table models + + .. py:attribute:: ui_color + :value: '#F58D7E' + + + + .. py:method:: add_cmd_flags() + + + .. py:method:: execute(context: airflow.utils.context.Context) + + This is the main method to derive when creating an operator. + Context is the same dictionary used as when rendering jinja templates. + + Refer to get_template_context for more context. + + + +.. py:class:: DbtRunLocalOperator(**kwargs) + + Bases: :py:obj:`DbtLocalBaseOperator` + + Executes a dbt core run command. + + .. py:attribute:: ui_color + :value: '#7352BA' + + + + .. py:attribute:: ui_fgcolor + :value: '#F4F2FC' + + + + .. py:method:: execute(context: airflow.utils.context.Context) + + This is the main method to derive when creating an operator. + Context is the same dictionary used as when rendering jinja templates. + + Refer to get_template_context for more context. + + + +.. py:class:: DbtTestLocalOperator(**kwargs) + + Bases: :py:obj:`DbtLocalBaseOperator` + + Executes a dbt core test command. + + .. py:attribute:: ui_color + :value: '#8194E0' + + + + .. py:method:: execute(context: airflow.utils.context.Context) + + This is the main method to derive when creating an operator. + Context is the same dictionary used as when rendering jinja templates. + + Refer to get_template_context for more context. + + + +.. py:class:: DbtRunOperationLocalOperator(macro_name: str, args: dict = None, **kwargs) + + Bases: :py:obj:`DbtLocalBaseOperator` + + Executes a dbt core run-operation command. + + :param macro_name: name of macro to execute + :param args: Supply arguments to the macro. This dictionary will be mapped to the keyword arguments defined in the + selected macro. + + .. py:attribute:: ui_color + :value: '#8194E0' + + + + .. py:attribute:: template_fields + :type: Sequence[str] + :value: 'args' + + + + .. py:method:: add_cmd_flags() + + + .. py:method:: execute(context: airflow.utils.context.Context) + + This is the main method to derive when creating an operator. + Context is the same dictionary used as when rendering jinja templates. + + Refer to get_template_context for more context. + + + +.. py:class:: DbtDepsLocalOperator(**kwargs) + + Bases: :py:obj:`DbtLocalBaseOperator` + + Executes a dbt core deps command. + + .. py:attribute:: ui_color + :value: '#8194E0' + + + + .. py:method:: execute(context: airflow.utils.context.Context) + + This is the main method to derive when creating an operator. + Context is the same dictionary used as when rendering jinja templates. + + Refer to get_template_context for more context. diff --git a/docs/autoapi/cosmos/providers/dbt/core/profiles/bigquery/index.rst b/docs/autoapi/cosmos/providers/dbt/core/profiles/bigquery/index.rst new file mode 100644 index 000000000..b9a3ab889 --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/profiles/bigquery/index.rst @@ -0,0 +1,35 @@ +:py:mod:`cosmos.providers.dbt.core.profiles.bigquery` +===================================================== + +.. py:module:: cosmos.providers.dbt.core.profiles.bigquery + + +Module Contents +--------------- + + +Functions +~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.profiles.bigquery.create_profile_vars_google_cloud_platform + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.profiles.bigquery.bigquery_profile + + +.. py:data:: bigquery_profile + + + +.. py:function:: create_profile_vars_google_cloud_platform(conn: airflow.models.Connection, database_override: str | None = None, schema_override: str | None = None) -> tuple[str, dict[str, str]] + + https://docs.getdbt.com/reference/warehouse-setups/bigquery-setup + https://airflow.apache.org/docs/apache-airflow-providers-google/stable/connections/gcp.html diff --git a/docs/autoapi/cosmos/providers/dbt/core/profiles/databricks/index.rst b/docs/autoapi/cosmos/providers/dbt/core/profiles/databricks/index.rst new file mode 100644 index 000000000..b15695924 --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/profiles/databricks/index.rst @@ -0,0 +1,44 @@ +:py:mod:`cosmos.providers.dbt.core.profiles.databricks` +======================================================= + +.. py:module:: cosmos.providers.dbt.core.profiles.databricks + + +Module Contents +--------------- + + +Functions +~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.profiles.databricks.create_profile_vars_databricks + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.profiles.databricks.logger + cosmos.providers.dbt.core.profiles.databricks.databricks_profile + + +.. py:data:: logger + + + +.. py:data:: databricks_profile + + + +.. py:function:: create_profile_vars_databricks(conn: airflow.models.Connection, database_override: str | None = None, schema_override: str | None = None) -> tuple[str, dict[str, str]] + + https://docs.getdbt.com/reference/warehouse-setups/databricks-setup + https://airflow.apache.org/docs/apache-airflow-providers-databricks/stable/connections/databricks.html + + Database override is used to reference a Unity Catalog which was made available in dbt-databricks>=1.1.1 + Airflow recommends specifying token in the password field as it's more secure. + If the host contains the https then we remove it. diff --git a/docs/autoapi/cosmos/providers/dbt/core/profiles/index.rst b/docs/autoapi/cosmos/providers/dbt/core/profiles/index.rst new file mode 100644 index 000000000..b5d7a1595 --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/profiles/index.rst @@ -0,0 +1,130 @@ +:py:mod:`cosmos.providers.dbt.core.profiles` +============================================ + +.. py:module:: cosmos.providers.dbt.core.profiles + + +Submodules +---------- +.. toctree:: + :titlesonly: + :maxdepth: 1 + + bigquery/index.rst + databricks/index.rst + postgres/index.rst + redshift/index.rst + snowflake/index.rst + + +Package Contents +---------------- + +Classes +~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.profiles.AdapterConfig + + + +Functions +~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.profiles.create_profile_vars_google_cloud_platform + cosmos.providers.dbt.core.profiles.create_profile_vars_databricks + cosmos.providers.dbt.core.profiles.create_profile_vars_postgres + cosmos.providers.dbt.core.profiles.create_profile_vars_redshift + cosmos.providers.dbt.core.profiles.create_profile_vars_snowflake + cosmos.providers.dbt.core.profiles.get_available_adapters + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.profiles.bigquery_profile + cosmos.providers.dbt.core.profiles.databricks_profile + cosmos.providers.dbt.core.profiles.postgres_profile + cosmos.providers.dbt.core.profiles.redshift_profile + cosmos.providers.dbt.core.profiles.snowflake_profile + + +.. py:data:: bigquery_profile + + + +.. py:function:: create_profile_vars_google_cloud_platform(conn: airflow.models.Connection, database_override: str | None = None, schema_override: str | None = None) -> tuple[str, dict[str, str]] + + https://docs.getdbt.com/reference/warehouse-setups/bigquery-setup + https://airflow.apache.org/docs/apache-airflow-providers-google/stable/connections/gcp.html + + +.. py:function:: create_profile_vars_databricks(conn: airflow.models.Connection, database_override: str | None = None, schema_override: str | None = None) -> tuple[str, dict[str, str]] + + https://docs.getdbt.com/reference/warehouse-setups/databricks-setup + https://airflow.apache.org/docs/apache-airflow-providers-databricks/stable/connections/databricks.html + + Database override is used to reference a Unity Catalog which was made available in dbt-databricks>=1.1.1 + Airflow recommends specifying token in the password field as it's more secure. + If the host contains the https then we remove it. + + +.. py:data:: databricks_profile + + + +.. py:function:: create_profile_vars_postgres(conn: airflow.models.Connection, database_override: str | None = None, schema_override: str | None = None) -> tuple[str, dict[str, str]] + + https://docs.getdbt.com/reference/warehouse-setups/postgres-setup + https://airflow.apache.org/docs/apache-airflow-providers-postgres/stable/connections/postgres.html + + +.. py:data:: postgres_profile + + + +.. py:function:: create_profile_vars_redshift(conn: airflow.models.Connection, database_override: str | None = None, schema_override: str | None = None) -> tuple[str, dict[str, str]] + + https://docs.getdbt.com/reference/warehouse-setups/redshift-setup + https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/connections/redshift.html + + +.. py:data:: redshift_profile + + + +.. py:function:: create_profile_vars_snowflake(conn: airflow.models.Connection, database_override: str | None = None, schema_override: str | None = None) -> tuple[str, dict[str, str]] + + https://docs.getdbt.com/reference/warehouse-setups/snowflake-setup + https://airflow.apache.org/docs/apache-airflow-providers-snowflake/stable/connections/snowflake.html + + +.. py:data:: snowflake_profile + + + +.. py:class:: AdapterConfig + + .. py:attribute:: profile_name + :type: str + + + + .. py:attribute:: profile + :type: Dict[str, str] + + + + .. py:attribute:: create_profile_function + :type: Callable[[airflow.models.Connection, Optional[str], Optional[str]], Tuple[str, Dict[str, str]]] + + + + +.. py:function:: get_available_adapters() -> Dict[str, AdapterConfig] diff --git a/docs/autoapi/cosmos/providers/dbt/core/profiles/postgres/index.rst b/docs/autoapi/cosmos/providers/dbt/core/profiles/postgres/index.rst new file mode 100644 index 000000000..79b6a8018 --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/profiles/postgres/index.rst @@ -0,0 +1,35 @@ +:py:mod:`cosmos.providers.dbt.core.profiles.postgres` +===================================================== + +.. py:module:: cosmos.providers.dbt.core.profiles.postgres + + +Module Contents +--------------- + + +Functions +~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.profiles.postgres.create_profile_vars_postgres + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.profiles.postgres.postgres_profile + + +.. py:data:: postgres_profile + + + +.. py:function:: create_profile_vars_postgres(conn: airflow.models.Connection, database_override: str | None = None, schema_override: str | None = None) -> tuple[str, dict[str, str]] + + https://docs.getdbt.com/reference/warehouse-setups/postgres-setup + https://airflow.apache.org/docs/apache-airflow-providers-postgres/stable/connections/postgres.html diff --git a/docs/autoapi/cosmos/providers/dbt/core/profiles/redshift/index.rst b/docs/autoapi/cosmos/providers/dbt/core/profiles/redshift/index.rst new file mode 100644 index 000000000..9d7b0e48f --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/profiles/redshift/index.rst @@ -0,0 +1,35 @@ +:py:mod:`cosmos.providers.dbt.core.profiles.redshift` +===================================================== + +.. py:module:: cosmos.providers.dbt.core.profiles.redshift + + +Module Contents +--------------- + + +Functions +~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.profiles.redshift.create_profile_vars_redshift + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.profiles.redshift.redshift_profile + + +.. py:data:: redshift_profile + + + +.. py:function:: create_profile_vars_redshift(conn: airflow.models.Connection, database_override: str | None = None, schema_override: str | None = None) -> tuple[str, dict[str, str]] + + https://docs.getdbt.com/reference/warehouse-setups/redshift-setup + https://airflow.apache.org/docs/apache-airflow-providers-amazon/stable/connections/redshift.html diff --git a/docs/autoapi/cosmos/providers/dbt/core/profiles/snowflake/index.rst b/docs/autoapi/cosmos/providers/dbt/core/profiles/snowflake/index.rst new file mode 100644 index 000000000..f303d1c78 --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/profiles/snowflake/index.rst @@ -0,0 +1,39 @@ +:py:mod:`cosmos.providers.dbt.core.profiles.snowflake` +====================================================== + +.. py:module:: cosmos.providers.dbt.core.profiles.snowflake + + +Module Contents +--------------- + + +Functions +~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.profiles.snowflake.get_snowflake_account + cosmos.providers.dbt.core.profiles.snowflake.create_profile_vars_snowflake + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.profiles.snowflake.snowflake_profile + + +.. py:data:: snowflake_profile + + + +.. py:function:: get_snowflake_account(account: str, region: str | None = None) -> str + + +.. py:function:: create_profile_vars_snowflake(conn: airflow.models.Connection, database_override: str | None = None, schema_override: str | None = None) -> tuple[str, dict[str, str]] + + https://docs.getdbt.com/reference/warehouse-setups/snowflake-setup + https://airflow.apache.org/docs/apache-airflow-providers-snowflake/stable/connections/snowflake.html diff --git a/docs/autoapi/cosmos/providers/dbt/core/utils/data_aware_scheduling/index.rst b/docs/autoapi/cosmos/providers/dbt/core/utils/data_aware_scheduling/index.rst new file mode 100644 index 000000000..72cb796af --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/utils/data_aware_scheduling/index.rst @@ -0,0 +1,20 @@ +:py:mod:`cosmos.providers.dbt.core.utils.data_aware_scheduling` +=============================================================== + +.. py:module:: cosmos.providers.dbt.core.utils.data_aware_scheduling + + +Module Contents +--------------- + + +Functions +~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.utils.data_aware_scheduling.get_dbt_dataset + + + +.. py:function:: get_dbt_dataset(connection_id: str, project_name: str, model_name: str) diff --git a/docs/autoapi/cosmos/providers/dbt/core/utils/file_syncing/index.rst b/docs/autoapi/cosmos/providers/dbt/core/utils/file_syncing/index.rst new file mode 100644 index 000000000..ed3c665b9 --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/utils/file_syncing/index.rst @@ -0,0 +1,40 @@ +:py:mod:`cosmos.providers.dbt.core.utils.file_syncing` +====================================================== + +.. py:module:: cosmos.providers.dbt.core.utils.file_syncing + + +Module Contents +--------------- + + +Functions +~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.utils.file_syncing.exclude + cosmos.providers.dbt.core.utils.file_syncing.has_differences + cosmos.providers.dbt.core.utils.file_syncing.is_file_locked + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.utils.file_syncing.logger + + +.. py:data:: logger + + + +.. py:function:: exclude(src_dir, contents) + + +.. py:function:: has_differences(dcmp) + + +.. py:function:: is_file_locked(file_path) diff --git a/docs/autoapi/cosmos/providers/dbt/core/utils/index.rst b/docs/autoapi/cosmos/providers/dbt/core/utils/index.rst new file mode 100644 index 000000000..43d525a9b --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/utils/index.rst @@ -0,0 +1,15 @@ +:py:mod:`cosmos.providers.dbt.core.utils` +========================================= + +.. py:module:: cosmos.providers.dbt.core.utils + + +Submodules +---------- +.. toctree:: + :titlesonly: + :maxdepth: 1 + + data_aware_scheduling/index.rst + file_syncing/index.rst + profiles_generator/index.rst diff --git a/docs/autoapi/cosmos/providers/dbt/core/utils/profiles_generator/index.rst b/docs/autoapi/cosmos/providers/dbt/core/utils/profiles_generator/index.rst new file mode 100644 index 000000000..2763ded3a --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/core/utils/profiles_generator/index.rst @@ -0,0 +1,44 @@ +:py:mod:`cosmos.providers.dbt.core.utils.profiles_generator` +============================================================ + +.. py:module:: cosmos.providers.dbt.core.utils.profiles_generator + + +Module Contents +--------------- + + +Functions +~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.utils.profiles_generator.create_default_profiles + cosmos.providers.dbt.core.utils.profiles_generator.map_profile + cosmos.providers.dbt.core.utils.profiles_generator.conn_exists + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.core.utils.profiles_generator.logger + + +.. py:data:: logger + + + +.. py:function:: create_default_profiles(profile_path: pathlib.Path) -> None + + Write all the available profiles out to the profile path. + :param profile_path: The path location to write all the profiles to. + :return: Nothing + + +.. py:function:: map_profile(conn_id: str, db_override: Optional[str] = None, schema_override: Optional[str] = None) -> Tuple[str, dict] + + +.. py:function:: conn_exists(conn_id: str) -> bool diff --git a/docs/autoapi/cosmos/providers/dbt/dag/index.rst b/docs/autoapi/cosmos/providers/dbt/dag/index.rst new file mode 100644 index 000000000..c07c3ce3c --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/dag/index.rst @@ -0,0 +1,44 @@ +:py:mod:`cosmos.providers.dbt.dag` +================================== + +.. py:module:: cosmos.providers.dbt.dag + +.. autoapi-nested-parse:: + + This module contains a function to render a dbt project as an Airflow DAG. + + + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.dag.DbtDag + + + + +.. py:class:: DbtDag(dbt_project_name: str, conn_id: str, dbt_args: Dict[str, Any] = {}, emit_datasets: bool = True, dbt_root_path: str = '/usr/local/airflow/dbt', dbt_models_dir: str = 'models', test_behavior: Literal[none, after_each, after_all] = 'after_each', select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, execution_mode: Literal[local, docker, kubernetes] = 'local', *args: Any, **kwargs: Any) + + Bases: :py:obj:`cosmos.core.airflow.CosmosDag` + + Render a dbt project as an Airflow DAG. Overrides the Airflow DAG model to allow + for additional configs to be passed. + + :param dbt_project_name: The name of the dbt project + :param dbt_root_path: The path to the dbt root directory + :param dbt_models_dir: The path to the dbt models directory within the project + :param conn_id: The Airflow connection ID to use for the dbt profile + :param dbt_args: Parameters to pass to the underlying dbt operators, can include dbt_executable_path to utilize venv + :param emit_datasets: If enabled test nodes emit Airflow Datasets for downstream cross-DAG dependencies + :param test_behavior: The behavior for running tests. Options are "none", "after_each", and "after_all". + Defaults to "after_each" + :param select: A dict of dbt selector arguments (i.e., {"tags": ["tag_1", "tag_2"]}) + :param exclude: A dict of dbt exclude arguments (i.e., {"tags": ["tag_1", "tag_2"]}) + :param execution_mode: The execution mode in which the dbt project should be run. + Options are "local", "docker", and "kubernetes". + Defaults to "local" diff --git a/docs/autoapi/cosmos/providers/dbt/index.rst b/docs/autoapi/cosmos/providers/dbt/index.rst new file mode 100644 index 000000000..f804543ae --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/index.rst @@ -0,0 +1,31 @@ +:py:mod:`cosmos.providers.dbt` +============================== + +.. py:module:: cosmos.providers.dbt + +.. autoapi-nested-parse:: + + dbt support for Airflow. Contains dags, task groups, and operators. + + + +Subpackages +----------- +.. toctree:: + :titlesonly: + :maxdepth: 3 + + core/index.rst + parser/index.rst + + +Submodules +---------- +.. toctree:: + :titlesonly: + :maxdepth: 1 + + constants/index.rst + dag/index.rst + render/index.rst + task_group/index.rst diff --git a/docs/autoapi/cosmos/providers/dbt/parser/index.rst b/docs/autoapi/cosmos/providers/dbt/parser/index.rst new file mode 100644 index 000000000..cfdb4445c --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/parser/index.rst @@ -0,0 +1,13 @@ +:py:mod:`cosmos.providers.dbt.parser` +===================================== + +.. py:module:: cosmos.providers.dbt.parser + + +Submodules +---------- +.. toctree:: + :titlesonly: + :maxdepth: 1 + + project/index.rst diff --git a/docs/autoapi/cosmos/providers/dbt/parser/project/index.rst b/docs/autoapi/cosmos/providers/dbt/parser/project/index.rst new file mode 100644 index 000000000..a1af4f4d3 --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/parser/project/index.rst @@ -0,0 +1,153 @@ +:py:mod:`cosmos.providers.dbt.parser.project` +============================================= + +.. py:module:: cosmos.providers.dbt.parser.project + +.. autoapi-nested-parse:: + + Used to parse and extract information from dbt projects. + + + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.parser.project.DbtModelConfig + cosmos.providers.dbt.parser.project.DbtModel + cosmos.providers.dbt.parser.project.DbtProject + + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.parser.project.logger + + +.. py:data:: logger + + + +.. py:class:: DbtModelConfig + + Represents a single model config. + + .. py:attribute:: config_types + :type: ClassVar[List[str]] + :value: ['materialized', 'schema', 'tags'] + + + + .. py:attribute:: config_selectors + :type: Set[str] + + + + .. py:attribute:: upstream_models + :type: Set[str] + + + + .. py:method:: __add__(other_config: DbtModelConfig) -> DbtModelConfig + + Add one config to another. Necessary because configs can come from different places + + + .. py:method:: _config_selector_ooo(sql_configs: Set[str], properties_configs: Set[str], prefixes: List[str] = None) -> Set[str] + + this will force values from the sql files to override whatever is in the properties.yml. So ooo: + # 1. model sql files + # 2. properties.yml files + + + +.. py:class:: DbtModel + + Represents a single dbt model. + + .. py:attribute:: name + :type: str + + + + .. py:attribute:: path + :type: pathlib.Path + + + + .. py:attribute:: config + :type: DbtModelConfig + + + + .. py:method:: __post_init__() -> None + + Parses the file and extracts metadata (dependencies, tags, etc) + + + .. py:method:: _extract_config(kwarg, config_name: str) + + + .. py:method:: __repr__() -> str + + Returns the string representation of the model. + + + +.. py:class:: DbtProject + + Represents a single dbt project. + + .. py:attribute:: project_name + :type: str + + + + .. py:attribute:: dbt_root_path + :type: str + :value: '/usr/local/airflow/dbt' + + + + .. py:attribute:: dbt_models_dir + :type: str + :value: 'models' + + + + .. py:attribute:: models + :type: Dict[str, DbtModel] + + + + .. py:attribute:: project_dir + :type: pathlib.Path + + + + .. py:attribute:: models_dir + :type: pathlib.Path + + + + .. py:method:: __post_init__() -> None + + Initializes the parser. + + + .. py:method:: _handle_sql_file(path: pathlib.Path) -> None + + Handles a single sql file. + + + .. py:method:: _handle_config_file(path: pathlib.Path) -> None + + Handles a single config file. diff --git a/docs/autoapi/cosmos/providers/dbt/render/index.rst b/docs/autoapi/cosmos/providers/dbt/render/index.rst new file mode 100644 index 000000000..6cf8833a8 --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/render/index.rst @@ -0,0 +1,56 @@ +:py:mod:`cosmos.providers.dbt.render` +===================================== + +.. py:module:: cosmos.providers.dbt.render + +.. autoapi-nested-parse:: + + This module contains a function to render a dbt project into Cosmos entities. + + + +Module Contents +--------------- + + +Functions +~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.render.calculate_operator_class + cosmos.providers.dbt.render.render_project + + + +Attributes +~~~~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.render.logger + + +.. py:data:: logger + + + +.. py:function:: calculate_operator_class(execution_mode: str, dbt_class: str) -> str + + +.. py:function:: render_project(dbt_project_name: str, dbt_root_path: str = '/usr/local/airflow/dbt', dbt_models_dir: str = 'models', task_args: Dict[str, Any] = {}, test_behavior: Literal[none, after_each, after_all] = 'after_each', emit_datasets: bool = True, conn_id: str = 'default_conn_id', select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, execution_mode: Literal[local, docker, kubernetes] = 'local') -> cosmos.core.graph.entities.Group + + Turn a dbt project into a Group + + :param dbt_project_name: The name of the dbt project + :param dbt_root_path: The root path to your dbt folder. Defaults to /usr/local/airflow/dbt + :param task_args: Arguments to pass to the underlying dbt operators + :param test_behavior: The behavior for running tests. Options are "none", "after_each", and "after_all". + Defaults to "after_each" + :param emit_datasets: If enabled test nodes emit Airflow Datasets for downstream cross-DAG dependencies + :param conn_id: The Airflow connection ID to use in Airflow Datasets + :param select: A dict of dbt selector arguments (i.e., {"tags": ["tag_1", "tag_2"]}) + :param exclude: A dict of dbt exclude arguments (i.e., {"tags": ["tag_1", "tag_2]}}) + :param execution_mode: The execution mode in which the dbt project should be run. + Options are "local", "docker", and "kubernetes". + Defaults to "local" diff --git a/docs/autoapi/cosmos/providers/dbt/task_group/index.rst b/docs/autoapi/cosmos/providers/dbt/task_group/index.rst new file mode 100644 index 000000000..6bae121a1 --- /dev/null +++ b/docs/autoapi/cosmos/providers/dbt/task_group/index.rst @@ -0,0 +1,44 @@ +:py:mod:`cosmos.providers.dbt.task_group` +========================================= + +.. py:module:: cosmos.providers.dbt.task_group + +.. autoapi-nested-parse:: + + This module contains a function to render a dbt project as an Airflow Task Group. + + + +Module Contents +--------------- + +Classes +~~~~~~~ + +.. autoapisummary:: + + cosmos.providers.dbt.task_group.DbtTaskGroup + + + + +.. py:class:: DbtTaskGroup(dbt_project_name: str, conn_id: str, dbt_args: Dict[str, Any] = {}, emit_datasets: bool = True, dbt_root_path: str = '/usr/local/airflow/dbt', dbt_models_dir: str = 'models', test_behavior: Literal[none, after_each, after_all] = 'after_each', select: Dict[str, List[str]] = {}, exclude: Dict[str, List[str]] = {}, execution_mode: Literal[local, docker, kubernetes] = 'local', *args: Any, **kwargs: Any) + + Bases: :py:obj:`cosmos.core.airflow.CosmosTaskGroup` + + Render a dbt project as an Airflow Task Group. Overrides the Airflow Task Group model to allow + for additional configs to be passed. + + :param dbt_project_name: The name of the dbt project + :param dbt_root_path: The path to the dbt root directory + :param dbt_models_dir: The path to the dbt models directory within the project + :param conn_id: The Airflow connection ID to use for the dbt profile + :param dbt_args: Parameters to pass to the underlying dbt operators, can include dbt_executable_path to utilize venv + :param emit_datasets: If enabled test nodes emit Airflow Datasets for downstream cross-DAG dependencies + :param test_behavior: The behavior for running tests. Options are "none", "after_each", and "after_all". + Defaults to "after_each" + :param select: A dict of dbt selector arguments (i.e., {"tags": ["tag_1", "tag_2"]}) + :param exclude: A dict of dbt exclude arguments (i.e., {"tags": ["tag_1", "tag_2"]}) + :param execution_mode: The execution mode in which the dbt project should be run. + Options are "local", "docker", and "kubernetes". + Defaults to "local" diff --git a/docs/autoapi/cosmos/providers/index.rst b/docs/autoapi/cosmos/providers/index.rst new file mode 100644 index 000000000..18cb3fb91 --- /dev/null +++ b/docs/autoapi/cosmos/providers/index.rst @@ -0,0 +1,13 @@ +:py:mod:`cosmos.providers` +========================== + +.. py:module:: cosmos.providers + + +Subpackages +----------- +.. toctree:: + :titlesonly: + :maxdepth: 3 + + dbt/index.rst diff --git a/docs/autoapi/index.rst b/docs/autoapi/index.rst new file mode 100644 index 000000000..ca6768652 --- /dev/null +++ b/docs/autoapi/index.rst @@ -0,0 +1,11 @@ +API Reference +============= + +This page contains auto-generated API reference documentation [#f1]_. + +.. toctree:: + :titlesonly: + + /autoapi/cosmos/index + +.. [#f1] Created with `sphinx-autoapi `_ diff --git a/docs/conf.py b/docs/conf.py index 4df234269..e1ae17073 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -20,14 +20,13 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration extensions = [ - "sphinx.ext.autodoc", - "sphinx.ext.autosummary", - "sphinx.ext.autosectionlabel", + "autoapi.extension", "sphinx_tabs.tabs", ] -autosummary_generate = True add_module_names = False +autoapi_dirs = ["../cosmos"] +autoapi_generate_api_docs = False autodoc_mock_imports = ["airflow"] templates_path = ["_templates"] diff --git a/docs/index.rst b/docs/index.rst index a533ba529..631fb8c30 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,7 +8,7 @@ Cosmos Home dbt - API Reference + API Reference Contributing .. note:: diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtDag.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtDag.rst deleted file mode 100644 index 733cf6543..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtDag.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtDag -=========================== - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtDag diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtLSDockerOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtLSDockerOperator.rst deleted file mode 100644 index fdadb8077..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtLSDockerOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtLSDockerOperator -======================================== - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtLSDockerOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtLSKubernetesOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtLSKubernetesOperator.rst deleted file mode 100644 index 412f003f1..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtLSKubernetesOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtLSKubernetesOperator -============================================ - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtLSKubernetesOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtLSLocalOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtLSLocalOperator.rst deleted file mode 100644 index 1de25e24a..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtLSLocalOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtLSLocalOperator -======================================= - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtLSLocalOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunDockerOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunDockerOperator.rst deleted file mode 100644 index 038b05417..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtRunDockerOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtRunDockerOperator -========================================= - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtRunDockerOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunKubernetesOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunKubernetesOperator.rst deleted file mode 100644 index 6593584d9..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtRunKubernetesOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtRunKubernetesOperator -============================================= - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtRunKubernetesOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunLocalOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunLocalOperator.rst deleted file mode 100644 index b47d88644..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtRunLocalOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtRunLocalOperator -======================================== - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtRunLocalOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationDockerOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationDockerOperator.rst deleted file mode 100644 index 558568f3a..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationDockerOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtRunOperationDockerOperator -================================================== - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtRunOperationDockerOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationKubernetesOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationKubernetesOperator.rst deleted file mode 100644 index 470e60aae..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationKubernetesOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtRunOperationKubernetesOperator -====================================================== - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtRunOperationKubernetesOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationLocalOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationLocalOperator.rst deleted file mode 100644 index b6fc16916..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtRunOperationLocalOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtRunOperationLocalOperator -================================================= - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtRunOperationLocalOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtSeedDockerOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtSeedDockerOperator.rst deleted file mode 100644 index c9be1e047..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtSeedDockerOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtSeedDockerOperator -========================================== - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtSeedDockerOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtSeedKubernetesOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtSeedKubernetesOperator.rst deleted file mode 100644 index 375dcb712..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtSeedKubernetesOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtSeedKubernetesOperator -============================================== - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtSeedKubernetesOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtSeedLocalOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtSeedLocalOperator.rst deleted file mode 100644 index fec2fcae9..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtSeedLocalOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtSeedLocalOperator -========================================= - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtSeedLocalOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtTaskGroup.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtTaskGroup.rst deleted file mode 100644 index 6f71220b5..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtTaskGroup.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtTaskGroup -================================= - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtTaskGroup diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtTestDockerOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtTestDockerOperator.rst deleted file mode 100644 index f96dc087b..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtTestDockerOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtTestDockerOperator -========================================== - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtTestDockerOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtTestKubernetesOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtTestKubernetesOperator.rst deleted file mode 100644 index bff212df5..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtTestKubernetesOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtTestKubernetesOperator -============================================== - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtTestKubernetesOperator diff --git a/docs/reference/_generated/cosmos.providers.dbt.DbtTestLocalOperator.rst b/docs/reference/_generated/cosmos.providers.dbt.DbtTestLocalOperator.rst deleted file mode 100644 index e4acb310e..000000000 --- a/docs/reference/_generated/cosmos.providers.dbt.DbtTestLocalOperator.rst +++ /dev/null @@ -1,6 +0,0 @@ -cosmos.providers.dbt.DbtTestLocalOperator -========================================= - -.. currentmodule:: cosmos.providers.dbt - -.. autodata:: DbtTestLocalOperator diff --git a/docs/reference/dbt-reference.rst b/docs/reference/dbt-reference.rst deleted file mode 100644 index afeb973ed..000000000 --- a/docs/reference/dbt-reference.rst +++ /dev/null @@ -1,40 +0,0 @@ -dbt -===================== - -Cosmos provides a set of classes and operators to interact with dbt. - -.. currentmodule:: cosmos.providers.dbt - -DAGs and Task Groups ---------------------- - -.. autosummary:: - :toctree: _generated/ - :caption: DAGs and Task Groups - - DbtDag - DbtTaskGroup - - -Operators ---------------------- - -.. autosummary:: - :toctree: _generated/ - :caption: Operators - - DbtLSLocalOperator, - DbtRunOperationLocalOperator, - DbtRunLocalOperator, - DbtSeedLocalOperator, - DbtTestLocalOperator, - DbtLSDockerOperator, - DbtRunOperationDockerOperator, - DbtRunDockerOperator, - DbtSeedDockerOperator, - DbtTestDockerOperator, - DbtLSKubernetesOperator, - DbtRunOperationKubernetesOperator, - DbtRunKubernetesOperator, - DbtSeedKubernetesOperator, - DbtTestKubernetesOperator, diff --git a/docs/reference/index.rst b/docs/reference/index.rst deleted file mode 100644 index 1471e2daf..000000000 --- a/docs/reference/index.rst +++ /dev/null @@ -1,7 +0,0 @@ -API Reference -============= - -.. toctree:: - :maxdepth: 5 - - dbt diff --git a/pyproject.toml b/pyproject.toml index 1fc3c8de0..18d139f83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,6 +77,7 @@ docs =[ "sphinx", "pydata-sphinx-theme", "sphinx-autobuild", + "sphinx-autoapi", "sphinx-tabs" ] tests = [