[dagster-dbt] Upgrade dagster-dbt to be compatible with dbt 1.5.x (#1…

…3907) ## Summary & Motivation Fixes: #13898 The changes: - `--no-use-color` is deprecated (`--no-use-colors` is allowed on all versions 1.0+) - The dbt selection string parsing logic relies on global state being set (I know, I know...), and the way that this global state is set has been changed (previously, they were attributes directly on the module, now they're all wrapped into a single GLOBAL_FLAGS object) - Nasty hack to deal with dbt-core shifting from `argparse` to `click`. We were using the argument parsing functionality to parse dbt Cloud job commands, so I found a somewhat similar entrypoint to work with. - Ensure that the current working directory when executing dbt commands is the project directory. This handles the issue linked below, where dbt now writes output files to the cwd rather than the project directory. This is not an issue if the cwd *is* the project directory ;) - Update some tests to be a bit more flexible to subtly different output formats (generally this is just testing stuff in the DbtCliOutput object, which we don't particularly expect or need to be very stable) - Some errors got reworded, so tests needed to be updated In general, the `select_unique_ids_from_manifest` function access a lot of things it potentially shouldn't touch from the dbt internals, and there's no guarantee similar issues don't crop up in the future. The hope is that there is eventually a dbt-native python entrypoint for this functionality, at which point in time we can remove all this ugly stuff. ## How I Tested These Changes Added a dbt_15X tox env, which is pinned to the specific rcs that were available to me. Once the real `dbt 1.5.x` comes out, this should be unpinned --------- Co-authored-by: Rex Ledesma <rex@elementl.com>
dagster-io · Apr 27, 2023 · d9f9087 · d9f9087
1 parent e3e7654
commit d9f9087
Show file tree

Hide file tree

Showing 10 changed files with 65 additions and 24 deletions.
diff --git a/.buildkite/dagster-buildkite/dagster_buildkite/steps/packages.py b/.buildkite/dagster-buildkite/dagster_buildkite/steps/packages.py
@@ -421,6 +421,7 @@ def k8s_extra_cmds(version: str, _) -> List[str]:
  pytest_tox_factors=[
  "dbt_13X",
  "dbt_14X",
+ "dbt_15X",
  ],
  ),
  PackageSpec(

diff --git a/examples/assets_smoke_test/setup.py b/examples/assets_smoke_test/setup.py
@@ -9,8 +9,7 @@
  "dagster-pandas",
  "dagster-dbt",
  "pandas",
- "dbt-core",
- "dbt-snowflake",
+ "dbt-snowflake>=1.2",
  "dagster-snowflake",
  "dagster-snowflake-pandas",
  ],

diff --git a/examples/project_fully_featured/setup.py b/examples/project_fully_featured/setup.py
@@ -23,9 +23,8 @@
  "dagster-pyspark",
  "dagster-slack",
  "dagster-postgres",
- "dbt-core",
- "dbt-duckdb",
- "dbt-snowflake",
+ "dbt-duckdb>=1.2",
+ "dbt-snowflake>=1.2",
  "duckdb!=0.3.3, <= 6.0.0", # missing wheels
  "mock",
  # DataFrames were not written to Snowflake, causing errors

diff --git a/python_modules/libraries/dagster-dbt/dagster_dbt/asset_defs.py b/python_modules/libraries/dagster-dbt/dagster_dbt/asset_defs.py
@@ -199,7 +199,7 @@ def _events_for_structured_json_line(
  output_name=output_name_fn(compiled_node_info),
  metadata=metadata,
  )
- elif node_resource_type == "test" and runtime_node_info.get("node_finished_at") is not None:
+ elif node_resource_type == "test" and runtime_node_info.get("node_finished_at"):
  upstream_unique_ids = (
  manifest_json["nodes"][unique_id].get("depends_on", {}).get("nodes", [])
  )

diff --git a/python_modules/libraries/dagster-dbt/dagster_dbt/cli/utils.py b/python_modules/libraries/dagster-dbt/dagster_dbt/cli/utils.py
@@ -89,7 +89,7 @@ def _create_command_list(
  if warn_error:
  prefix += ["--warn-error"]
  if json_log_format:
- prefix += ["--no-use-color", "--log-format", "json"]
+ prefix += ["--no-use-colors", "--log-format", "json"]
  if debug:
  prefix += ["--debug"]
 
@@ -117,6 +117,7 @@ def _core_execute_cli(
  command_list: Sequence[str],
  ignore_handled_error: bool,
  json_log_format: bool,
+ project_dir: str,
 ) -> Iterator[Union[DbtCliEvent, int]]:
  """Runs a dbt command in a subprocess and yields parsed output line by line."""
  # Execute the dbt CLI command in a subprocess.
@@ -130,6 +131,7 @@ def _core_execute_cli(
  stdout=subprocess.PIPE,
  stderr=subprocess.STDOUT,
  env=passenv,
+ cwd=project_dir if os.path.exists(project_dir) else None,
  )
  for raw_line in process.stdout or []:
  line = raw_line.decode().strip()
@@ -180,6 +182,7 @@ def execute_cli_stream(
  command_list=command_list,
  json_log_format=json_log_format,
  ignore_handled_error=ignore_handled_error,
+ project_dir=flags_dict["project-dir"],
  ):
  if isinstance(event, int):
  return_code = event
@@ -227,6 +230,7 @@ def execute_cli(
  command_list=command_list,
  json_log_format=json_log_format,
  ignore_handled_error=ignore_handled_error,
+ project_dir=flags_dict["project-dir"],
  ):
  if isinstance(event, int):
  return_code = event

diff --git a/python_modules/libraries/dagster-dbt/dagster_dbt/cloud/asset_defs.py b/python_modules/libraries/dagster-dbt/dagster_dbt/cloud/asset_defs.py
@@ -36,7 +36,6 @@
 from dagster._core.definitions.metadata import MetadataUserInput
 from dagster._core.execution.context.init import build_init_resource_context
 from dagster._utils.backcompat import experimental_arg_warning
-from dbt.main import parse_args as dbt_parse_args
 
 from dagster_dbt.asset_utils import (
  default_asset_key_fn,
@@ -98,7 +97,21 @@ def build_definitions(
 
  @staticmethod
  def parse_dbt_command(dbt_command: str) -> Namespace:
- return dbt_parse_args(args=shlex.split(dbt_command)[1:])
+ args = shlex.split(dbt_command)[1:]
+ try:
+ from dbt.cli.flags import ( # pyright: ignore [reportMissingImports]
+ Flags,
+ args_to_context,
+ )
+
+ # nasty hack to get dbt to parse the args
+ # dbt >= 1.5.0 requires that profiles-dir is set to an existing directory
+ return Namespace(**vars(Flags(args_to_context(args + ["--profiles-dir", "."]))))
+ except ImportError:
+ # dbt < 1.5.0 compat
+ from dbt.main import parse_args
+
+ return parse_args(args=args)
 
  @staticmethod
  def get_job_materialization_command_step(execute_steps: List[str]) -> int:
@@ -127,8 +140,11 @@ def get_compile_filters(parsed_args: Namespace) -> List[str]:
  if excluded_models:
  dbt_compile_options.append(f"--exclude {' '.join(excluded_models)}")
 
- if parsed_args.selector_name:
- dbt_compile_options.append(f"--selector {parsed_args.selector_name}")
+ selector = getattr(parsed_args, "selector_name", None) or getattr(
+ parsed_args, "selector", None
+ )
+ if selector:
+ dbt_compile_options.append(f"--selector {selector}")
 
  return dbt_compile_options
 
@@ -174,8 +190,7 @@ def _compile_dbt_cloud_job(self, dbt_cloud_job: Mapping[str, Any]) -> Tuple[int,
  #
  # Since we're only doing this to generate the dependency structure, just use an arbitrary
  # partition key (e.g. the last one) to retrieve the partition variable.
- dbt_vars = json.loads(parsed_args.vars or "{}")
- if dbt_vars:
+ if parsed_args.vars and parsed_args.vars != "{}":
  raise DagsterDbtCloudJobInvariantViolationError(
  f"The dbt Cloud job '{dbt_cloud_job['name']}' ({dbt_cloud_job['id']}) must not have"
  " variables defined from `--vars` in its `dbt run` or `dbt build` command."

diff --git a/python_modules/libraries/dagster-dbt/dagster_dbt/utils.py b/python_modules/libraries/dagster-dbt/dagster_dbt/utils.py
@@ -247,7 +247,6 @@ def select_unique_ids_from_manifest(
  manifest_json: Optional[Mapping[str, Any]] = None,
 ) -> AbstractSet[str]:
  """Method to apply a selection string to an existing manifest.json file."""
- import dbt.flags as flags
  import dbt.graph.cli as graph_cli
  import dbt.graph.selector as graph_selector
  from dbt.contracts.graph.manifest import Manifest, WritableManifest
@@ -300,7 +299,13 @@ def __getattr__(self, item):
  graph = graph_selector.Graph(DiGraph(incoming_graph_data=child_map))
 
  # create a parsed selection from the select string
- flags.INDIRECT_SELECTION = IndirectSelection.Eager
+ try:
+ from dbt.flags import GLOBAL_FLAGS
+ except ImportError:
+ # dbt < 1.5.0 compat
+ import dbt.flags as GLOBAL_FLAGS
+ setattr(GLOBAL_FLAGS, "INDIRECT_SELECTION", IndirectSelection.Eager)
+ setattr(GLOBAL_FLAGS, "WARN_ERROR", True)
  parsed_spec: SelectionSpec = graph_cli.parse_union([select], True)
 
  if exclude:

diff --git a/python_modules/libraries/dagster-dbt/dagster_dbt_tests/cli/test_resources.py b/python_modules/libraries/dagster-dbt/dagster_dbt_tests/cli/test_resources.py
@@ -50,7 +50,10 @@ def my_dbt_solid(context):
 
  context = get_dbt_solid_context(test_project_dir, dbt_config_dir)
  dbt_result = my_dbt_solid(context)
- assert len(dbt_result.raw_output.split("\n\n")) == 25
+ # slightly different output formats between dbt versions result in raw log output shifting
+ expected_results = 25
+ raw_lines = len(dbt_result.raw_output.split("\n\n"))
+ assert raw_lines == expected_results or raw_lines == expected_results + 2
 
 
 def test_ls_resource_type(test_project_dir, dbt_config_dir):
@@ -60,7 +63,10 @@ def my_dbt_solid(context):
 
  context = get_dbt_solid_context(test_project_dir, dbt_config_dir)
  dbt_result = my_dbt_solid(context)
- assert len(dbt_result.raw_output.split("\n\n")) == 6
+ # slightly different output formats between dbt versions result in raw log output shifting
+ expected_results = 6
+ raw_lines = len(dbt_result.raw_output.split("\n\n"))
+ assert raw_lines == expected_results or raw_lines == expected_results + 2
 
 
 def test_test(dbt_seed, test_project_dir, dbt_config_dir):
@@ -147,7 +153,10 @@ def my_dbt_solid(context):
  context = get_dbt_solid_context(test_project_dir, dbt_config_dir)
  dbt_result = my_dbt_solid(context)
  assert len(dbt_result.result["results"]) == 4
- assert json.loads(dbt_result.result["args"]["vars"]) == my_vars
+
+ # vars can be stored as a dict or a json-encoded string depending on dbt version
+ result_vars = dbt_result.result["args"]["vars"]
+ assert result_vars == my_vars or json.loads(result_vars) == my_vars
 
 
 def test_models_and_extra_run(dbt_seed, test_project_dir, dbt_config_dir):
@@ -161,7 +170,9 @@ def my_dbt_solid(context):
  dbt_result = my_dbt_solid(context)
 
  assert len(dbt_result.result["results"]) == 2
- assert json.loads(dbt_result.result["args"]["vars"]) == my_vars
+ # vars can be stored as a dict or a json-encoded string depending on dbt version
+ result_vars = dbt_result.result["args"]["vars"]
+ assert result_vars == my_vars or json.loads(result_vars) == my_vars
 
 
 def test_exclude_run(dbt_seed, test_project_dir, dbt_config_dir):
@@ -175,7 +186,9 @@ def my_dbt_solid(context):
  dbt_result = my_dbt_solid(context)
 
  assert len(dbt_result.result["results"]) == 3
- assert json.loads(dbt_result.result["args"]["vars"]) == my_vars
+ # vars can be stored as a dict or a json-encoded string depending on dbt version
+ result_vars = dbt_result.result["args"]["vars"]
+ assert result_vars == my_vars or json.loads(result_vars) == my_vars
 
 
 def test_merged_extra_flags_run(dbt_seed, test_project_dir, dbt_config_dir):
@@ -192,4 +205,6 @@ def my_dbt_solid(context):
  assert len(dbt_result.result["results"]) == 4
  for key in my_vars.keys():
  assert key in dbt_result.command
- assert json.loads(dbt_result.result["args"]["vars"]) == my_vars
+ # vars can be stored as a dict or a json-encoded string depending on dbt version
+ result_vars = dbt_result.result["args"]["vars"]
+ assert result_vars == my_vars or json.loads(result_vars) == my_vars
diff --git a/python_modules/libraries/dagster-dbt/dagster_dbt_tests/test_asset_defs.py b/python_modules/libraries/dagster-dbt/dagster_dbt_tests/test_asset_defs.py
@@ -435,7 +435,7 @@ def foo():
 
 
 def test_dbt_ls_fail_fast():
- with pytest.raises(DagsterDbtCliFatalRuntimeError, match="Invalid --project-dir flag."):
+ with pytest.raises(DagsterDbtCliFatalRuntimeError, match=r"Invalid.*--project-dir"):
  load_assets_from_dbt_project("bad_project_dir", "bad_config_dir")
 
 
@@ -749,7 +749,7 @@ def test_dbt_selections(
 @pytest.mark.parametrize(
  "select,error_match",
  [
- ("tag:nonexist", "No dbt models match"),
+ ("tag:nonexist", r"(No dbt models match|does not match any nodes)"),
  ("asjdlhalskujh:z", "not a valid method name"),
  ],
 )

diff --git a/python_modules/libraries/dagster-dbt/tox.ini b/python_modules/libraries/dagster-dbt/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py{39,38,37,36}-{unix,windows}-{dbt_13X,dbt_14X}
+envlist = py{39,38,37,36}-{unix,windows}-{dbt_13X,dbt_14X,dbt_15X}
 skipsdist = true
 
 [testenv]
@@ -13,10 +13,13 @@ deps =
  dbt_13X: dbt-duckdb==1.3.*
  dbt_14X: dbt-core==1.4.*
  dbt_14X: dbt-duckdb==1.4.*
+ dbt_15X: dbt-core==1.5.*
+ dbt_15X: dbt-duckdb==1.5.*
  -e .[test]
 allowlist_externals =
  /bin/bash
 commands =
  !windows: /bin/bash -c '! pip list --exclude-editable | grep -e dagster -e dagit'
  dbt_13X: pytest -c ../../../pyproject.toml -vv {posargs}
  dbt_14X: pytest -c ../../../pyproject.toml -vv {posargs}
+ dbt_15X: pytest -c ../../../pyproject.toml -vv {posargs}