From a8cdc39e03cf7e981906ccfb13dd146df17ea8b4 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Mon, 5 Oct 2020 14:41:22 -0400 Subject: [PATCH] Save args in run_results.json --- CHANGELOG.md | 1 + core/dbt/contracts/results.py | 3 ++ core/dbt/contracts/rpc.py | 3 ++ core/dbt/task/runnable.py | 30 ++++++++++++++++++- .../integration/028_cli_vars/test_cli_vars.py | 9 ++++++ .../test_docs_generate.py | 24 ++++++++------- test/integration/100_rpc_test/test_rpc.py | 1 + 7 files changed, 59 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34f2ef619c3..4d22ebffc55 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - Added ability to snapshot hard-deleted records (opt-in with `invalidate_hard_deletes` config option). ([#249](https://github.com/fishtown-analytics/dbt/issues/249), [#2749](https://github.com/fishtown-analytics/dbt/pull/2749)) - Improved error messages for YAML selectors ([#2700](https://github.com/fishtown-analytics/dbt/issues/2700), [#2781](https://github.com/fishtown-analytics/dbt/pull/2781)) - Save manifest at the same time we save the run_results at the end of a run ([#2765](https://github.com/fishtown-analytics/dbt/issues/2765), [#2799](https://github.com/fishtown-analytics/dbt/pull/2799)) +- Save cli and rpc arguments in run_results.json ([#2510](https://github.com/fishtown-analytics/dbt/issues/2510), [#2813](https://github.com/fishtown-analytics/dbt/pull/2813)) ### Under the hood - Added strategy-specific validation to improve the relevancy of compilation errors for the `timestamp` and `check` snapshot strategies. (([#2787](https://github.com/fishtown-analytics/dbt/issues/2787), [#2791](https://github.com/fishtown-analytics/dbt/pull/2791)) diff --git a/core/dbt/contracts/results.py b/core/dbt/contracts/results.py index fd67532653d..dd4311637b4 100644 --- a/core/dbt/contracts/results.py +++ b/core/dbt/contracts/results.py @@ -129,6 +129,7 @@ class RunResultsArtifact( ArtifactMixin, ): results: Sequence[RunResult] + args: Dict[str, Any] = field(default_factory=dict) @classmethod def from_node_results( @@ -136,6 +137,7 @@ def from_node_results( results: Sequence[RunResult], elapsed_time: float, generated_at: datetime, + args: Dict, ): meta = RunResultsMetadata( dbt_schema_version=str(cls.dbt_schema_version), @@ -145,6 +147,7 @@ def from_node_results( metadata=meta, results=results, elapsed_time=elapsed_time, + args=args ) diff --git a/core/dbt/contracts/rpc.py b/core/dbt/contracts/rpc.py index 1270acadfb4..66282d1684a 100644 --- a/core/dbt/contracts/rpc.py +++ b/core/dbt/contracts/rpc.py @@ -225,6 +225,7 @@ def error(self): @dataclass @schema_version('remote-execution-result', 1) class RemoteExecutionResult(ExecutionResult, RemoteResult): + args: Dict[str, Any] = field(default_factory=dict) results: Sequence[RunResult] generated_at: datetime = field(default_factory=datetime.utcnow) @@ -233,6 +234,7 @@ def write(self, path: str): generated_at=self.generated_at, results=self.results, elapsed_time=self.elapsed_time, + args=self.args, ) writable.write(path) @@ -246,6 +248,7 @@ def from_local_result( generated_at=base.metadata.generated_at, results=base.results, elapsed_time=base.elapsed_time, + args=base.args, logs=logs, ) diff --git a/core/dbt/task/runnable.py b/core/dbt/task/runnable.py index 0f79b707fc5..7058e7bdad7 100644 --- a/core/dbt/task/runnable.py +++ b/core/dbt/task/runnable.py @@ -5,6 +5,7 @@ from datetime import datetime from multiprocessing.dummy import Pool as ThreadPool from typing import Optional, Dict, List, Set, Tuple, Iterable, AbstractSet +from pathlib import PosixPath, WindowsPath from .printer import ( print_run_result_error, @@ -530,11 +531,38 @@ def create_schema(relation: BaseRelation) -> None: create_future.result() def get_result(self, results, elapsed_time, generated_at): + return RunResultsArtifact.from_node_results( results=results, elapsed_time=elapsed_time, - generated_at=generated_at + generated_at=generated_at, + args=self.args_to_dict(), ) + def args_to_dict(self): + var_args = vars(self.args) + dict_args = {} + # remove args keys that clutter up the dictionary + for key in var_args: + if key == 'cls': + continue + if var_args[key] is None: + continue + default_false_keys = ( + 'debug', 'full_refresh', 'fail_fast', 'warn_error', + 'single_threaded', 'test_new_parser', 'log_cache_events', + 'strict' + ) + if key in default_false_keys and var_args[key] is False: + continue + if key == 'vars' and var_args[key] == '{}': + continue + # this was required for a test case + if (isinstance(var_args[key], PosixPath) or + isinstance(var_args[key], WindowsPath)): + var_args[key] = str(var_args[key]) + dict_args[key] = var_args[key] + return dict_args + def task_end_messages(self, results): print_run_end_messages(results) diff --git a/test/integration/028_cli_vars/test_cli_vars.py b/test/integration/028_cli_vars/test_cli_vars.py index 487daa4083c..d0873b6e107 100644 --- a/test/integration/028_cli_vars/test_cli_vars.py +++ b/test/integration/028_cli_vars/test_cli_vars.py @@ -1,5 +1,6 @@ from test.integration.base import DBTIntegrationTest, use_profile import yaml +import json class TestCLIVars(DBTIntegrationTest): @@ -57,3 +58,11 @@ def test__postgres_cli_vars_longer(self): self.assertEqual(len(results), 1) results = self.run_dbt(["test", "--vars", "{simple: abc, unused: def}"]) self.assertEqual(len(results), 1) + run_results = _read_json('./target/run_results.json') + self.assertEqual(run_results['args']['vars'], "{simple: abc, unused: def}") + + +def _read_json(path): + # read json generated by dbt. + with open(path) as fp: + return json.load(fp) diff --git a/test/integration/029_docs_generate_tests/test_docs_generate.py b/test/integration/029_docs_generate_tests/test_docs_generate.py index 21666dd5af2..994b64bed72 100644 --- a/test/integration/029_docs_generate_tests/test_docs_generate.py +++ b/test/integration/029_docs_generate_tests/test_docs_generate.py @@ -3333,21 +3333,23 @@ def expected_postgres_references_run_results(self): ] def verify_run_results(self, expected_run_results): - run_result = _read_json('./target/run_results.json') + run_results = _read_json('./target/run_results.json') - assert 'metadata' in run_result - self.verify_metadata(run_result['metadata'], 'https://schemas.getdbt.com/dbt/run-results/v1.json') - self.assertIn('elapsed_time', run_result) - self.assertGreater(run_result['elapsed_time'], 0) + assert 'metadata' in run_results + self.verify_metadata(run_results['metadata'], 'https://schemas.getdbt.com/dbt/run-results/v1.json') + self.assertIn('elapsed_time', run_results) + self.assertGreater(run_results['elapsed_time'], 0) self.assertTrue( - isinstance(run_result['elapsed_time'], float), - "run_result['elapsed_time'] is of type {}, expected float".format( - str(type(run_result['elapsed_time']))) + isinstance(run_results['elapsed_time'], float), + "run_results['elapsed_time'] is of type {}, expected float".format( + str(type(run_results['elapsed_time']))) ) + + assert 'args' in run_results # sort the results so we can make reasonable assertions - run_result['results'].sort(key=lambda r: r['node']['unique_id']) - assert run_result['results'] == expected_run_results - set(run_result) == {'elapsed_time', 'results', 'metadata'} + run_results['results'].sort(key=lambda r: r['node']['unique_id']) + assert run_results['results'] == expected_run_results + set(run_results) == {'elapsed_time', 'results', 'metadata'} @use_profile('postgres') def test__postgres__run_and_generate_no_compile(self): diff --git a/test/integration/100_rpc_test/test_rpc.py b/test/integration/100_rpc_test/test_rpc.py index 954dffe805d..42349b4b4d3 100644 --- a/test/integration/100_rpc_test/test_rpc.py +++ b/test/integration/100_rpc_test/test_rpc.py @@ -875,6 +875,7 @@ def test_compile_project_cli_postgres(self): @use_profile('postgres') def test_run_project_postgres(self): result = self.async_query('run').json() + assert 'args' in result['result'] self.assertHasResults(result, {'descendant_model', 'multi_source_model', 'nonsource_descendant'}) self.assertTablesEqual('multi_source_model', 'expected_multi_source')