From 7d5a50dcd55d7fdd785c63512c61ac9a06020d00 Mon Sep 17 00:00:00 2001 From: Gerda Shank Date: Tue, 31 Jan 2023 21:21:59 -0500 Subject: [PATCH] Create protobuf message representations of graph nodes --- .../unreleased/Features-20230131-212702.yaml | 6 + core/dbt/constants.py | 2 + core/dbt/contracts/graph/README.md | 28 + core/dbt/contracts/graph/model_config.py | 91 ++- core/dbt/contracts/graph/nodes.proto | 588 +++++++++++++ core/dbt/contracts/graph/nodes.py | 244 +++++- core/dbt/contracts/graph/proto_nodes.py | 772 ++++++++++++++++++ core/dbt/contracts/graph/unparsed.py | 48 ++ core/dbt/contracts/graph/utils.py | 19 + core/dbt/events/README.md | 4 + core/dbt/events/types.proto | 2 - core/dbt/parser/manifest.py | 8 +- core/dbt/task/parse.py | 0 core/dbt/utils.py | 7 + core/setup.py | 2 +- dev-requirements.txt | 2 +- tests/unit/test_proto_nodes.py | 378 +++++++++ 17 files changed, 2182 insertions(+), 19 deletions(-) create mode 100644 .changes/unreleased/Features-20230131-212702.yaml create mode 100644 core/dbt/contracts/graph/README.md create mode 100644 core/dbt/contracts/graph/nodes.proto create mode 100644 core/dbt/contracts/graph/proto_nodes.py delete mode 100644 core/dbt/task/parse.py create mode 100644 tests/unit/test_proto_nodes.py diff --git a/.changes/unreleased/Features-20230131-212702.yaml b/.changes/unreleased/Features-20230131-212702.yaml new file mode 100644 index 00000000000..7d7a27868f0 --- /dev/null +++ b/.changes/unreleased/Features-20230131-212702.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Crate protobuf representation of nodes +time: 2023-01-31T21:27:02.209758-05:00 +custom: + Author: gshank + Issue: "6391" diff --git a/core/dbt/constants.py b/core/dbt/constants.py index 63213476e54..a055db3a571 100644 --- a/core/dbt/constants.py +++ b/core/dbt/constants.py @@ -8,3 +8,5 @@ PIN_PACKAGE_URL = ( "https://docs.getdbt.com/docs/package-management#section-specifying-package-versions" ) + +MANIFEST_FILE_NAME = "manifest.json" diff --git a/core/dbt/contracts/graph/README.md b/core/dbt/contracts/graph/README.md new file mode 100644 index 00000000000..964c4ca8b9f --- /dev/null +++ b/core/dbt/contracts/graph/README.md @@ -0,0 +1,28 @@ +# nodes.proto messages + +For new fields in a node or node config to be included in the protobuf serialized output, +the messages in nodes.proto need to be updated. + +Then proto.nodes need to be compiled: ```protoc --python_betterproto_out . nodes.proto``` + +In order to use optional fields (really necessary for nodes and configs) we had to use +a beta version of betterproto. This version has a bug in the way that it writes the +names of some generated classes, so we will have to update the name of the rpc node from +RpcNode to RPCNode in the generated file. + +In addition, betterproto now always creates the generated python file as an __init__.py +file in a subdirectory. For now, I'm moving it up from proto_nodes/__init__.py to proto_nodes.py. + +# updating nodes.py and model_config.py for nodes.proto changes + +Protobuf python messages objects are created to "to_msg" methods. There is often a list +of attributes to set in a "msg_attributes" method, but this isn't entirely consistent. +If a class has a small number of additional attributes they are sometimes set directly. +Some attributes aren't handled well by the "get_msg_attribute_value" utility function, +in which case they are set directly. This is particularly true of lists or dictionaries +of objects, which need to be converted using a "to_msg" method. + +The utility class "get_msg_attribute_value" does a couple of common conversions of +attribute values, such as getting the string value of an enum or converting dictionaries to +dictionaries of strings. A few common more elaborate conversions are also performed, such as +"columns". diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index 407c5435786..8c608aa2a0a 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -8,11 +8,12 @@ register_pattern, ) from dbt.contracts.graph.unparsed import AdditionalPropertiesAllowed, Docs -from dbt.contracts.graph.utils import validate_color +from dbt.contracts.graph.utils import validate_color, get_msg_attribute_value from dbt.exceptions import DbtInternalError, CompilationError from dbt.contracts.util import Replaceable, list_str from dbt import hooks from dbt.node_types import NodeType +from dbt.contracts.graph import proto_nodes M = TypeVar("M", bound="Metadata") @@ -195,6 +196,9 @@ class Hook(dbtClassMixin, Replaceable): transaction: bool = True index: Optional[int] = None + def to_msg(self): + return proto_nodes.Hook(sql=self.sql, transaction=self.transaction, index=self.index) + T = TypeVar("T", bound="BaseConfig") @@ -404,9 +408,15 @@ class NodeAndTestConfig(BaseConfig): metadata=MergeBehavior.Update.meta(), ) + @classmethod + def msg_attributes(self): + return ["enabled", "alias", "schema", "database", "tags", "meta"] + @dataclass class NodeConfig(NodeAndTestConfig): + """This config is used by ModelNode, AnalysisNode, RPCNode, SqlNode, HookNode""" + # Note: if any new fields are added with MergeBehavior, also update the # 'mergebehavior' dictionary materialized: str = "view" @@ -447,6 +457,35 @@ class NodeConfig(NodeAndTestConfig): metadata=MergeBehavior.Update.meta(), ) + @classmethod + def msg_attributes(self): + return [ + "materialized", + "incremental_strategy", + "persist_docs", + "quoting", + "full_refresh", + "unique_key", + "on_schema_change", + "grants", + "packages", + "docs", + ] + + def to_msg(self): + # Get matching msg config class + config_name = type(self).__name__ + msg_cls = getattr(proto_nodes, config_name) + msg_config = msg_cls() + + for cls in [NodeAndTestConfig, NodeConfig]: + for attribute in cls.msg_attributes(): + value = get_msg_attribute_value(self, attribute) + setattr(msg_config, attribute, value) + msg_config.post_hook = [hk.to_msg() for hk in self.post_hook] + msg_config.pre_hook = [hk.to_msg() for hk in self.pre_hook] + return msg_config + # we validate that node_color has a suitable value to prevent dbt-docs from crashing def __post_init__(self): if self.docs.node_color: @@ -495,6 +534,11 @@ class SeedConfig(NodeConfig): materialized: str = "seed" quote_columns: Optional[bool] = None + def to_msg(self): + msg = super().to_msg() + msg.quote_columns = self.quote_columns + return msg + @classmethod def validate(cls, data): super().validate(data) @@ -502,8 +546,11 @@ def validate(cls, data): raise ValidationError("A seed must have a materialized value of 'seed'") +# This is used in both GenericTestNode and SingularTestNode, but some +# of these attributes seem specific to GenericTestNode. @dataclass class TestConfig(NodeAndTestConfig): + __test__ = False # this is repeated because of a different default schema: Optional[str] = field( default="dbt_test__audit", @@ -518,6 +565,27 @@ class TestConfig(NodeAndTestConfig): warn_if: str = "!= 0" error_if: str = "!= 0" + @classmethod + def msg_attributes(self): + return [ + "materialized", + "severity", + "store_failures", + "where", + "limit", + "fail_calc", + "warn_if", + "error_if", + ] + + def to_msg(self): + msg_config = proto_nodes.TestConfig() + for cls in [NodeAndTestConfig, TestConfig]: + for attribute in cls.msg_attributes(): + value = get_msg_attribute_value(self, attribute) + setattr(msg_config, attribute, value) + return msg_config + @classmethod def same_contents(cls, unrendered: Dict[str, Any], other: Dict[str, Any]) -> bool: """This is like __eq__, except it explicitly checks certain fields.""" @@ -563,6 +631,27 @@ class SnapshotConfig(EmptySnapshotConfig): # Not using Optional because of serialization issues with a Union of str and List[str] check_cols: Union[str, List[str], None] = None + @classmethod + def msg_attributes(self): + return ["strategy", "unique_key", "target_schema", "target_database"] + + def to_msg(self): + msg_config = super().to_msg() # Uses NodeConfig to_msg + for attribute in self.msg_attributes(): + value = get_msg_attribute_value(self, attribute) + setattr(msg_config, attribute, value) + msg_config.check_cols = self.normalize_check_cols() + return msg_config + + def normalize_check_cols(self): + """Ensure that check_cols is always a list""" + if self.check_cols is None: + return [] + elif isinstance(self.check_cols, str): + return [self.check_cols] + else: + return self.check_cols + @classmethod def validate(cls, data): super().validate(data) diff --git a/core/dbt/contracts/graph/nodes.proto b/core/dbt/contracts/graph/nodes.proto new file mode 100644 index 00000000000..b18d40fb54e --- /dev/null +++ b/core/dbt/contracts/graph/nodes.proto @@ -0,0 +1,588 @@ +syntax = "proto3"; + +package proto_nodes; + + +message ListOfStrings { + repeated string value = 1; +} + +message Hook { + string sql = 1; + bool transaction = 2; + optional int index = 3; +} + +message Docs { + bool show = 1; + optional string node_color = 2; +} + +message MacroDependsOn { + repeated string macros = 1; +} + +message DependsOn { + repeated string macros = 1; + repeated string nodes = 2; +} + +message MacroArgument { + string name = 1; + optional string type = 2; + string description = 3; +} + +message Quoting { + optional string database = 1; + optional string schema = 2; + optional string identifier = 3; + optional string column = 4; +} + +message Time { + optional int32 count = 1; + optional string period = 2; +} + +message FreshnessThreshold { + optional Time warn_after = 1; + optional Time error_after = 2; + optional string filter = 3; +} + +message ExposureOwner { + string email = 1; + optional string name = 2; +} + +message MetricFilter { + string field = 1; + string operator = 2; + string value = 3; +} + +message MetricTime { + optional int32 count = 1; + optional string period = 2; +} + +message NodeConfig { + bool enabled = 1; + optional string alias = 2; + optional string schema = 3; + optional string database = 4; + repeated string tags = 5; + map meta = 6; + string materialized = 7; + optional string incremental_strategy = 8; + map persist_docs = 9; + Hook post_hook = 10; + Hook pre_hook = 11; + map quoting = 12; + map column_types = 13; + optional bool full_refresh = 14; + optional string unique_key = 15; + string on_schema_change = 16; + map grants = 17; + repeated string packages = 18; + Docs docs = 19; +} + +message SeedConfig { + bool enabled = 1; + optional string alias = 2; + optional string schema = 3; + optional string database = 4; + repeated string tags = 5; + map meta = 6; + string materialized = 7; + optional string incremental_strategy = 8; + map persist_docs = 9; + // post_hook = 10; + // pre_hook = 11; + map quoting = 12; + map column_types = 13; + optional bool full_refresh = 14; + optional string unique_key = 15; + string on_schema_change = 16; + map grants = 17; + repeated string packages = 18; + Docs docs = 19; + optional bool quote_columns = 20; +} + +message TestConfig { + bool enabled = 1; + optional string alias = 2; + optional string schema = 3; + optional string database = 4; + repeated string tags = 5; + map meta = 6; + string materialized = 8; + string severity = 9; + optional bool store_failures = 10; + optional string where = 11; + optional int32 limit = 12; + string fail_calc = 13; + string warn_if = 14; + string error_if = 15; +} + +// NodeConfig plus strategy, target_schema, target_database, check_cols +message SnapshotConfig { + bool enabled = 1; + optional string alias = 2; + optional string schema = 3; + optional string database = 4; + repeated string tags = 5; + map meta = 6; + string materialized = 7; + optional string incremental_strategy = 8; + map persist_docs = 9; + // post_hook = 10; + // pre_hook = 11; + map quoting = 12; + map column_types = 13; + optional bool full_refresh = 14; + optional string unique_key = 15; + string on_schema_change = 16; + map grants = 17; + repeated string packages = 18; + Docs docs = 19; + optional string strategy = 20; + optional string target_schema = 21; + optional string target_database = 22; + repeated string check_cols = 23; +} + +message SourceConfig { + bool enabled = 1; +} + +message ExposureConfig { + bool enabled = 1; +} + +message MetricConfig { + bool enabled = 1; +} + +message ColumnInfo { + string name = 1; + string description = 2; + map meta = 3; + optional string data_type = 4; + optional bool quote = 5; + repeated string tags = 6; + map _extra = 7; +} + + +// There are three nodes that are exactly the same as this one (AnalysisNode, RPCNode, and +// SqlNOde), and one with only one additional attribute (HookNode). Making separate messages +// for now, but we can revisit later. If we clean up the config classes, some of those +// nodes might end up with different config classes, which would require separate node messages. +message ModelNode { + string name = 1; + string resource_type = 2; + string package_name = 3; + string path = 4; + string original_file_path = 5; + string unique_id = 6; + repeated string fqn = 7; + optional string database = 8; + string schema = 9; + string alias = 10; + NodeConfig config = 11; + repeated string tags = 12; + string description = 13; + map columns = 14; + map meta = 15; + Docs docs = 16; + string patch_path = 17; + bool deferred = 18; + map unrendered_config = 19; + string relation_name = 20; + string raw_code = 21; + string language = 22; + repeated ListOfStrings refs = 23; + repeated ListOfStrings sources = 24; + repeated ListOfStrings metrics = 25; + DependsOn depends_on = 26; + bool compiled = 27; + string compiled_code = 28; +} + +// This should be exactly the same as ModelNode +message AnalysisNode { + string name = 1; + string resource_type = 2; + string package_name = 3; + string path = 4; + string original_file_path = 5; + string unique_id = 6; + repeated string fqn = 7; + optional string database = 8; + string schema = 9; + string alias = 10; + NodeConfig config = 11; + repeated string tags = 12; + string description = 13; + map columns = 14; + map meta = 15; + Docs docs = 16; + string patch_path = 17; + bool deferred = 18; + map unrendered_config = 19; + string relation_name = 20; + string raw_code = 21; + string language = 22; + repeated ListOfStrings refs = 23; + repeated ListOfStrings sources = 24; + repeated ListOfStrings metrics = 25; + DependsOn depends_on = 26; + bool compiled = 27; + string compiled_code = 28; +} + +// This should be exactly the same as ModelNode +message RPCNode { + string name = 1; + string resource_type = 2; + string package_name = 3; + string path = 4; + string original_file_path = 5; + string unique_id = 6; + repeated string fqn = 7; + optional string database = 8; + string schema = 9; + string alias = 10; + NodeConfig config = 11; + repeated string tags = 12; + string description = 13; + map columns = 14; + map meta = 15; + Docs docs = 16; + string patch_path = 17; + bool deferred = 18; + map unrendered_config = 19; + string relation_name = 20; + string raw_code = 21; + string language = 22; + repeated ListOfStrings refs = 23; + repeated ListOfStrings sources = 24; + repeated ListOfStrings metrics = 25; + DependsOn depends_on = 26; + bool compiled = 27; + string compiled_code = 28; +} + + +// This should be exactly the same as ModelNode +message SqlNode { + string name = 1; + string resource_type = 2; + string package_name = 3; + string path = 4; + string original_file_path = 5; + string unique_id = 6; + repeated string fqn = 7; + optional string database = 8; + string schema = 9; + string alias = 10; + NodeConfig config = 11; + repeated string tags = 12; + string description = 13; + map columns = 14; + map meta = 15; + Docs docs = 16; + string patch_path = 17; + bool deferred = 18; + map unrendered_config = 19; + string relation_name = 20; + string raw_code = 21; + string language = 22; + repeated ListOfStrings refs = 23; + repeated ListOfStrings sources = 24; + repeated ListOfStrings metrics = 25; + DependsOn depends_on = 26; + bool compiled = 27; + string compiled_code = 28; +} + +// This should be the same as ModelNode plus additional "index" attribute +message HookNode { + string name = 1; + string resource_type = 2; + string package_name = 3; + string path = 4; + string original_file_path = 5; + string unique_id = 6; + repeated string fqn = 7; + optional string database = 8; + string schema = 9; + string alias = 10; + NodeConfig config = 11; + repeated string tags = 12; + string description = 13; + map columns = 14; + map meta = 15; + Docs docs = 16; + string patch_path = 17; + bool deferred = 18; + map unrendered_config = 19; + string relation_name = 20; + string raw_code = 21; + string language = 22; + repeated ListOfStrings refs = 23; + repeated ListOfStrings sources = 24; + repeated ListOfStrings metrics = 25; + DependsOn depends_on = 26; + bool compiled = 27; + string compiled_code = 28; + optional int32 index = 29; +} + + +message SeedNode { + string name = 1; + string resource_type = 2; + string package_name = 3; + string path = 4; + string original_file_path = 5; + string unique_id = 6; + repeated string fqn = 7; + optional string database = 8; + string schema = 9; + string alias = 10; + SeedConfig config = 11; + repeated string tags = 12; + string description = 13; + map columns = 14; + map meta = 15; + Docs docs = 16; + string patch_path = 17; + bool deferred = 18; + map unrendered_config = 19; + string relation_name = 20; + string raw_code = 21; + string root_path = 22; +} + + +// Same as ModelNode except config is TestConfig +message SingularTestNode { + string name = 1; + string resource_type = 2; + string package_name = 3; + string path = 4; + string original_file_path = 5; + string unique_id = 6; + repeated string fqn = 7; + optional string database = 8; + string schema = 9; + string alias = 10; + TestConfig config = 11; + repeated string tags = 12; + string description = 13; + map columns = 14; + map meta = 15; + Docs docs = 16; + string patch_path = 17; + bool deferred = 18; + map unrendered_config = 19; + string relation_name = 20; + string raw_code = 21; + string language = 22; + repeated ListOfStrings refs = 23; + repeated ListOfStrings sources = 24; + repeated ListOfStrings metrics = 25; + DependsOn depends_on = 26; + bool compiled = 27; + string compiled_code = 28; +} + +message TestMetadata { + string name = 1; + map kwargs = 2; + optional string namespace = 3; +} + +// Same as ModelNode except config is TestConfig, and has test_metadata +// and column_name attributes. +message GenericTestNode { + string name = 1; + string resource_type = 2; + string package_name = 3; + string path = 4; + string original_file_path = 5; + string unique_id = 6; + repeated string fqn = 7; + optional string database = 8; + string schema = 9; + string alias = 10; + TestConfig config = 11; + repeated string tags = 12; + string description = 13; + map columns = 14; + map meta = 15; + Docs docs = 16; + string patch_path = 17; + bool deferred = 18; + map unrendered_config = 19; + string relation_name = 20; + string raw_code = 21; + string language = 22; + repeated ListOfStrings refs = 23; + repeated ListOfStrings sources = 24; + repeated ListOfStrings metrics = 25; + DependsOn depends_on = 26; + bool compiled = 27; + string compiled_code = 28; + TestMetadata test_metadata = 29; + optional string column_name = 30; +} + +// SnapshotNode - Sames as ModelNode except with SnapshotConfig +message SnapshotNode { + string name = 1; + string resource_type = 2; + string package_name = 3; + string path = 4; + string original_file_path = 5; + string unique_id = 6; + repeated string fqn = 7; + optional string database = 8; + string schema = 9; + string alias = 10; + SnapshotConfig config = 11; + repeated string tags = 12; + string description = 13; + map columns = 14; + map meta = 15; + Docs docs = 16; + string patch_path = 17; + bool deferred = 18; + map unrendered_config = 19; + string relation_name = 20; + string raw_code = 21; + string language = 22; + repeated ListOfStrings refs = 23; + repeated ListOfStrings sources = 24; + repeated ListOfStrings metrics = 25; + DependsOn depends_on = 26; + bool compiled = 27; + string compiled_code = 28; +} + +// Macro - BaseNode plus additional attributes +message Macro { + string name = 1; + string resource_type = 2; + string package_name = 3; + string path = 4; + string original_file_path = 5; + string unique_id = 6; + string macro_sql = 7; + MacroDependsOn depends_on = 8; + string description = 9; + map meta = 10; + Docs docs = 11; + optional string patch_path = 12; + repeated MacroArgument arguments = 13; + repeated string supported_languages = 14; +} + +// Documentation - BaseNode plus block_contents +message Documentation { + string name = 1; + string resource_type = 2; + string package_name = 3; + string path = 4; + string original_file_path = 5; + string unique_id = 6; + string block_contents = 7; +} + +message SourceDefinition { + string name = 1; + string resource_type = 2; + string package_name = 3; + string path = 4; + string original_file_path = 5; + string unique_id = 6; + repeated string fqn = 7; + optional string database = 8; + string schema = 9; + string source_name = 10; + string source_description = 11; + string loader = 12; + string identifier = 13; + Quoting quoting = 14; + optional string loaded_at_field = 15; + optional FreshnessThreshold freshness = 16; + // optional ExternalTable external = 17; + string description = 18; + map columns = 19; + map meta = 20; + map source_meta = 21; + repeated string tags = 22; + SourceConfig config = 23; + optional string patch_path = 24; + map unrendered_config = 25; + optional string relation_name = 26; +} + +message Exposure { + string name = 1; + string resource_type = 2; + string package_name = 3; + string path = 4; + string original_file_path = 5; + string unique_id = 6; + repeated string fqn = 7; + string type = 8; + ExposureOwner owner = 9; + string description = 10; + optional string label = 11; + optional string maturity = 12; + map meta = 13; + repeated string tags = 14; + ExposureConfig config = 15; + map unrendered_config = 16; + optional string url = 17; + DependsOn depends_on = 18; + repeated ListOfStrings refs = 24; + repeated ListOfStrings sources = 25; + repeated ListOfStrings metrics = 26; +} + +message Metric { + string name = 1; + string resource_type = 2; + string package_name = 3; + string path = 4; + string original_file_path = 5; + string unique_id = 6; + repeated string fqn = 7; + string description = 8; + string label = 9; + string calculation_method = 10; + string expression = 11; + repeated MetricFilter filters = 12; + repeated string time_grains = 13; + repeated string dimensions = 14; + optional string timestamp = 15; + optional MetricTime window = 16; + optional string model = 17; + optional string model_unique_id = 18; + map meta = 19; + repeated string tags = 20; + MetricConfig config = 21; + map unrendered_config = 22; + repeated ListOfStrings refs = 24; + repeated ListOfStrings sources = 25; + repeated ListOfStrings metrics = 26; + DependsOn depends_on = 27; +} diff --git a/core/dbt/contracts/graph/nodes.py b/core/dbt/contracts/graph/nodes.py index b0b6d85655e..4fc986fe149 100644 --- a/core/dbt/contracts/graph/nodes.py +++ b/core/dbt/contracts/graph/nodes.py @@ -35,6 +35,7 @@ MetricTime, ) from dbt.contracts.util import Replaceable, AdditionalPropertiesMixin +from dbt.contracts.graph.utils import get_msg_attribute_value from dbt.events.proto_types import NodeInfo from dbt.events.functions import warn_or_error from dbt.exceptions import ParsingError @@ -49,6 +50,7 @@ from dbt.node_types import ModelLanguage, NodeType from dbt.utils import cast_dict_to_dict_of_strings +from dbt.contracts.graph import proto_nodes from .model_config import ( NodeConfig, @@ -95,6 +97,18 @@ class BaseNode(dbtClassMixin, Replaceable): original_file_path: str unique_id: str + @classmethod + def msg_attributes(self): + """Attributes of this class that are included in protobuf definitions""" + return [ + "name", + "resource_type", + "package_name", + "path", + "original_file_path", + "unique_id", + ] + @property def search_name(self): return self.name @@ -134,6 +148,10 @@ class GraphNode(BaseNode): fqn: List[str] + @classmethod + def msg_attributes(self): + return ["fqn"] + def same_fqn(self, other) -> bool: return self.fqn == other.fqn @@ -150,6 +168,18 @@ class ColumnInfo(AdditionalPropertiesMixin, ExtensibleDbtClassMixin, Replaceable tags: List[str] = field(default_factory=list) _extra: Dict[str, Any] = field(default_factory=dict) + def to_msg(self): + column_info_msg = proto_nodes.ColumnInfo( + name=self.name, + description=self.description, + meta=cast_dict_to_dict_of_strings(self.meta), + data_type=self.data_type, + quote=self.quote, + tags=self.tags, + _extra=cast_dict_to_dict_of_strings(self._extra), + ) + return column_info_msg + # Metrics, exposures, @dataclass @@ -179,6 +209,9 @@ def add_macro(self, value: str): if value not in self.macros: self.macros.append(value) + def to_msg(self): + return proto_nodes.MacroDependsOn(macros=self.macros) + @dataclass class DependsOn(MacroDependsOn): @@ -188,13 +221,20 @@ def add_node(self, value: str): if value not in self.nodes: self.nodes.append(value) + def to_msg(self): + return proto_nodes.DependsOn(nodes=self.nodes, macros=self.macros) + @dataclass class ParsedNodeMandatory(GraphNode, HasRelationMetadata, Replaceable): alias: str - checksum: FileHash + checksum: FileHash # not included in protobuf messages config: NodeConfig = field(default_factory=NodeConfig) + @classmethod + def msg_attributes(self): + return ["fqn", "database", "schema", "alias", "config"] + @property def identifier(self): return self.alias @@ -249,6 +289,22 @@ class ParsedNode(NodeInfoMixin, ParsedNodeMandatory, SerializableType): relation_name: Optional[str] = None raw_code: str = "" + @classmethod + def msg_attributes(self): + # Does not included created_at, config_call_dict, build_path + return [ + "tags", + "description", + "columns", + "meta", + "docs", + "patch_path", + "deferred", + "unrendered_config", + "relation_name", + "raw_code", + ] + def write_node(self, target_path: str, subdirectory: str, payload: str): if os.path.basename(self.path) == os.path.basename(self.original_file_path): # One-to-one relationship of nodes to files. @@ -401,6 +457,19 @@ class CompiledNode(ParsedNode): extra_ctes: List[InjectedCTE] = field(default_factory=list) _pre_injected_sql: Optional[str] = None + @classmethod + def msg_attributes(self): + # Does not include extra_ctes_injected, extra_ctes, _pre_injected_sql, compiled_path + return [ + "language", + "refs", + "sources", + "metrics", + "depends_on", + "compiled", + "compiled_code", + ] + @property def empty(self): return not self.raw_code.strip() @@ -438,6 +507,19 @@ def depends_on_nodes(self): def depends_on_macros(self): return self.depends_on.macros + def to_msg(self): + + # Get matching msg node class + node_name = type(self).__name__ + msg_cls = getattr(proto_nodes, node_name) + msg_node = msg_cls() + + for cls in [BaseNode, GraphNode, ParsedNodeMandatory, ParsedNode, CompiledNode]: + for attribute in cls.msg_attributes(): + value = get_msg_attribute_value(self, attribute) + setattr(msg_node, attribute, value) + return msg_node + # ==================================== # CompiledNode subclasses @@ -454,13 +536,18 @@ class HookNode(CompiledNode): resource_type: NodeType = field(metadata={"restrict": [NodeType.Operation]}) index: Optional[int] = None + def to_msg(self): + msg = super().to_msg() + msg.index = self.index + return msg + @dataclass class ModelNode(CompiledNode): resource_type: NodeType = field(metadata={"restrict": [NodeType.Model]}) -# TODO: rm? +# TODO: this node type should probably be removed when the rpc server is no longer supported @dataclass class RPCNode(CompiledNode): resource_type: NodeType = field(metadata={"restrict": [NodeType.RPCCall]}) @@ -485,6 +572,15 @@ class SeedNode(ParsedNode): # No SQLDefaults! root_path: Optional[str] = None depends_on: MacroDependsOn = field(default_factory=MacroDependsOn) + def to_msg(self): + seed_node = proto_nodes.SeedNode() + for cls in [BaseNode, ParsedNodeMandatory, ParsedNode]: + for attribute in cls.msg_attributes(): + value = get_msg_attribute_value(self, attribute) + setattr(seed_node, attribute, value) + seed_node.root_path = self.root_path + return seed_node + def same_seeds(self, other: "SeedNode") -> bool: # for seeds, we check the hashes. If the hashes are different types, # no match. If the hashes are both the same 'path', log a warning and @@ -608,6 +704,8 @@ class SingularTestNode(TestShouldStoreFailures, CompiledNode): # refactor the various configs. config: TestConfig = field(default_factory=TestConfig) # type: ignore + # no to_msg method because the CompiledNode default works fine + @property def test_node_type(self): return "singular" @@ -620,6 +718,7 @@ def test_node_type(self): @dataclass class TestMetadata(dbtClassMixin, Replaceable): + __test__ = False name: str # kwargs are the args that are left in the test builder after # removing configs. They are set from the test builder when @@ -639,11 +738,16 @@ class HasTestMetadata(dbtClassMixin): class GenericTestNode(TestShouldStoreFailures, CompiledNode, HasTestMetadata): resource_type: NodeType = field(metadata={"restrict": [NodeType.Test]}) column_name: Optional[str] = None - file_key_name: Optional[str] = None + file_key_name: Optional[str] = None # Not included in protobuf message # Was not able to make mypy happy and keep the code working. We need to # refactor the various configs. config: TestConfig = field(default_factory=TestConfig) # type: ignore + def to_msg(self): + msg = super().to_msg() + msg.column_name = self.column_name + return msg + def same_contents(self, other) -> bool: if other is None: return False @@ -697,6 +801,29 @@ class Macro(BaseNode): created_at: float = field(default_factory=lambda: time.time()) supported_languages: Optional[List[ModelLanguage]] = None + @classmethod + def msg_attributes(self): + return ["macro_sql", "depends_on", "description", "meta", "docs", "patch_path"] + + def to_msg(self): + msg = proto_nodes.Macro() + for cls in [BaseNode, Macro]: + for attribute in cls.msg_attributes(): + value = getattr(self, attribute) + if value is None: + continue + setattr(msg, attribute, value) + arguments = [ + proto_nodes.MacroArgument(name=ma.name, description=ma.description, type=ma.type) + for ma in self.arguments + ] + msg.arguments = arguments + supported_languages = [] + if isinstance(self.supported_languages, list): + supported_languages = [ml.value for ml in self.supported_languages] + msg.supported_languages = supported_languages + return msg + def patch(self, patch: "ParsedMacroPatch"): self.patch_path: Optional[str] = patch.file_id self.description = patch.description @@ -727,6 +854,15 @@ class Documentation(BaseNode): block_contents: str resource_type: NodeType = field(metadata={"restrict": [NodeType.Documentation]}) + def to_msg(self): + msg = proto_nodes.Documentation() + for cls in [BaseNode]: + for attribute in cls.msg_attributes(): + value = getattr(self, attribute) + setattr(msg, attribute, value) + msg.block_contents = self.block_contents + return msg + @property def search_name(self): return self.name @@ -821,6 +957,39 @@ class SourceDefinition(NodeInfoMixin, ParsedSourceMandatory): relation_name: Optional[str] = None created_at: float = field(default_factory=lambda: time.time()) + @classmethod + def msg_attributes(self): + return [ + "source_name", + "source_description", + "loader", + "identifier", + "resource_type", + "quoting", + "loaded_at_field", + "freshness", + "external", + "description", + "columns", + "meta", + "source_meta", + "tags", + "config", + "patch_path", + "unrendered_config", + "relation_name", + "database", + "schema", + ] + + def to_msg(self): + msg = proto_nodes.SourceDefinition() + for cls in [BaseNode, GraphNode, SourceDefinition]: + for attribute in cls.msg_attributes(): + value = get_msg_attribute_value(self, attribute) + setattr(msg, attribute, value) + return msg + def __post_serialize__(self, dct): if "_event_status" in dct: del dct["_event_status"] @@ -943,6 +1112,32 @@ class Exposure(GraphNode): metrics: List[List[str]] = field(default_factory=list) created_at: float = field(default_factory=lambda: time.time()) + @classmethod + def msg_attributes(self): + return [ + "type", + "description", + "label", + "maturity", + "meta", + "tags", + "config", + "unrendered_config", + "url", + "depends_on", + "refs", + "sources", + "metrics", + ] + + def to_msg(self): + msg = proto_nodes.Exposure() + for cls in [BaseNode, GraphNode, Exposure]: + for attribute in cls.msg_attributes(): + value = get_msg_attribute_value(self, attribute) + setattr(msg, attribute, value) + return msg + @property def depends_on_nodes(self): return self.depends_on.nodes @@ -1003,15 +1198,8 @@ def same_contents(self, old: Optional["Exposure"]) -> bool: # ==================================== -@dataclass -class MetricReference(dbtClassMixin, Replaceable): - sql: Optional[Union[str, int]] - unique_id: Optional[str] - - @dataclass class Metric(GraphNode): - name: str description: str label: str calculation_method: str @@ -1028,12 +1216,44 @@ class Metric(GraphNode): tags: List[str] = field(default_factory=list) config: MetricConfig = field(default_factory=MetricConfig) unrendered_config: Dict[str, Any] = field(default_factory=dict) - sources: List[List[str]] = field(default_factory=list) - depends_on: DependsOn = field(default_factory=DependsOn) refs: List[List[str]] = field(default_factory=list) + sources: List[List[str]] = field(default_factory=list) metrics: List[List[str]] = field(default_factory=list) + depends_on: DependsOn = field(default_factory=DependsOn) created_at: float = field(default_factory=lambda: time.time()) + @classmethod + def msg_attributes(self): + return [ + "description", + "label", + "calculation_method", + "expression", + "time_grains", + "dimensions", + "timestamp", + "window", + "model", + "model_unique_id", + "meta", + "tags", + "config", + "unrendered_config", + "refs", + "sources", + "metrics", + "depends_on", + ] + + def to_msg(self): + msg = proto_nodes.Metric() + for cls in [BaseNode, GraphNode, Metric]: + for attribute in cls.msg_attributes(): + value = get_msg_attribute_value(self, attribute) + setattr(msg, attribute, value) + msg.filters = [mf.to_msg() for mf in self.filters] + return msg + @property def depends_on_nodes(self): return self.depends_on.nodes diff --git a/core/dbt/contracts/graph/proto_nodes.py b/core/dbt/contracts/graph/proto_nodes.py new file mode 100644 index 00000000000..8886d108c79 --- /dev/null +++ b/core/dbt/contracts/graph/proto_nodes.py @@ -0,0 +1,772 @@ +# Generated by the protocol buffer compiler. DO NOT EDIT! +# sources: nodes.proto +# plugin: python-betterproto +from dataclasses import dataclass +from typing import ( + Dict, + List, + Optional, +) + +import betterproto + + +@dataclass(eq=False, repr=False) +class ListOfStrings(betterproto.Message): + value: List[str] = betterproto.string_field(1) + + +@dataclass(eq=False, repr=False) +class Docs(betterproto.Message): + show: bool = betterproto.bool_field(1) + node_color: str = betterproto.string_field(2) + + +@dataclass(eq=False, repr=False) +class MacroDependsOn(betterproto.Message): + macros: List[str] = betterproto.string_field(1) + + +@dataclass(eq=False, repr=False) +class DependsOn(betterproto.Message): + macros: List[str] = betterproto.string_field(1) + nodes: List[str] = betterproto.string_field(2) + + +@dataclass(eq=False, repr=False) +class MacroArgument(betterproto.Message): + name: str = betterproto.string_field(1) + type: Optional[str] = betterproto.string_field(2, optional=True, group="_type") + description: str = betterproto.string_field(3) + + +@dataclass(eq=False, repr=False) +class Quoting(betterproto.Message): + database: Optional[str] = betterproto.string_field(1, optional=True, group="_database") + schema: Optional[str] = betterproto.string_field(2, optional=True, group="_schema") + identifier: Optional[str] = betterproto.string_field(3, optional=True, group="_identifier") + column: Optional[str] = betterproto.string_field(4, optional=True, group="_column") + + +@dataclass(eq=False, repr=False) +class Time(betterproto.Message): + count: Optional[int] = betterproto.int32_field(1, optional=True, group="_count") + period: Optional[str] = betterproto.string_field(2, optional=True, group="_period") + + +@dataclass(eq=False, repr=False) +class FreshnessThreshold(betterproto.Message): + warn_after: Optional["Time"] = betterproto.message_field(1, optional=True, group="_warn_after") + error_after: Optional["Time"] = betterproto.message_field( + 2, optional=True, group="_error_after" + ) + filter: Optional[str] = betterproto.string_field(3, optional=True, group="_filter") + + +@dataclass(eq=False, repr=False) +class ExposureOwner(betterproto.Message): + email: str = betterproto.string_field(1) + name: Optional[str] = betterproto.string_field(2, optional=True, group="_name") + + +@dataclass(eq=False, repr=False) +class MetricFilter(betterproto.Message): + field: str = betterproto.string_field(1) + operator: str = betterproto.string_field(2) + value: str = betterproto.string_field(3) + + +@dataclass(eq=False, repr=False) +class MetricTime(betterproto.Message): + count: Optional[int] = betterproto.int32_field(1, optional=True, group="_count") + period: Optional[str] = betterproto.string_field(2, optional=True, group="_period") + + +@dataclass(eq=False, repr=False) +class NodeConfig(betterproto.Message): + enabled: bool = betterproto.bool_field(1) + alias: Optional[str] = betterproto.string_field(2, optional=True, group="_alias") + schema: Optional[str] = betterproto.string_field(3, optional=True, group="_schema") + database: Optional[str] = betterproto.string_field(4, optional=True, group="_database") + tags: List[str] = betterproto.string_field(5) + meta: Dict[str, str] = betterproto.map_field( + 6, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + materialized: str = betterproto.string_field(7) + incremental_strategy: Optional[str] = betterproto.string_field( + 8, optional=True, group="_incremental_strategy" + ) + persist_docs: Dict[str, str] = betterproto.map_field( + 9, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + quoting: Dict[str, str] = betterproto.map_field( + 12, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + """post_hook = 10; pre_hook = 11;""" + + column_types: Dict[str, str] = betterproto.map_field( + 13, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + full_refresh: Optional[bool] = betterproto.bool_field(14, optional=True, group="_full_refresh") + unique_key: Optional[str] = betterproto.string_field(15, optional=True, group="_unique_key") + on_schema_change: str = betterproto.string_field(16) + grants: Dict[str, str] = betterproto.map_field( + 17, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + packages: List[str] = betterproto.string_field(18) + docs: "Docs" = betterproto.message_field(19) + + +@dataclass(eq=False, repr=False) +class SeedConfig(betterproto.Message): + enabled: bool = betterproto.bool_field(1) + alias: Optional[str] = betterproto.string_field(2, optional=True, group="_alias") + schema: Optional[str] = betterproto.string_field(3, optional=True, group="_schema") + database: Optional[str] = betterproto.string_field(4, optional=True, group="_database") + tags: List[str] = betterproto.string_field(5) + meta: Dict[str, str] = betterproto.map_field( + 6, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + materialized: str = betterproto.string_field(7) + incremental_strategy: Optional[str] = betterproto.string_field( + 8, optional=True, group="_incremental_strategy" + ) + persist_docs: Dict[str, str] = betterproto.map_field( + 9, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + quoting: Dict[str, str] = betterproto.map_field( + 12, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + """post_hook = 10; pre_hook = 11;""" + + column_types: Dict[str, str] = betterproto.map_field( + 13, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + full_refresh: Optional[bool] = betterproto.bool_field(14, optional=True, group="_full_refresh") + unique_key: Optional[str] = betterproto.string_field(15, optional=True, group="_unique_key") + on_schema_change: str = betterproto.string_field(16) + grants: Dict[str, str] = betterproto.map_field( + 17, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + packages: List[str] = betterproto.string_field(18) + docs: "Docs" = betterproto.message_field(19) + quote_columns: Optional[bool] = betterproto.bool_field( + 20, optional=True, group="_quote_columns" + ) + + +@dataclass(eq=False, repr=False) +class TestConfig(betterproto.Message): + enabled: bool = betterproto.bool_field(1) + alias: Optional[str] = betterproto.string_field(2, optional=True, group="_alias") + schema: Optional[str] = betterproto.string_field(3, optional=True, group="_schema") + database: Optional[str] = betterproto.string_field(4, optional=True, group="_database") + tags: List[str] = betterproto.string_field(5) + meta: Dict[str, str] = betterproto.map_field( + 6, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + materialized: str = betterproto.string_field(8) + severity: str = betterproto.string_field(9) + store_failures: Optional[bool] = betterproto.bool_field( + 10, optional=True, group="_store_failures" + ) + where: Optional[str] = betterproto.string_field(11, optional=True, group="_where") + limit: Optional[int] = betterproto.int32_field(12, optional=True, group="_limit") + fail_calc: str = betterproto.string_field(13) + warn_if: str = betterproto.string_field(14) + error_if: str = betterproto.string_field(15) + + +@dataclass(eq=False, repr=False) +class SnapshotConfig(betterproto.Message): + """NodeConfig plus strategy, target_schema, target_database, check_cols""" + + enabled: bool = betterproto.bool_field(1) + alias: Optional[str] = betterproto.string_field(2, optional=True, group="_alias") + schema: Optional[str] = betterproto.string_field(3, optional=True, group="_schema") + database: Optional[str] = betterproto.string_field(4, optional=True, group="_database") + tags: List[str] = betterproto.string_field(5) + meta: Dict[str, str] = betterproto.map_field( + 6, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + materialized: str = betterproto.string_field(7) + incremental_strategy: Optional[str] = betterproto.string_field( + 8, optional=True, group="_incremental_strategy" + ) + persist_docs: Dict[str, str] = betterproto.map_field( + 9, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + quoting: Dict[str, str] = betterproto.map_field( + 12, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + """post_hook = 10; pre_hook = 11;""" + + column_types: Dict[str, str] = betterproto.map_field( + 13, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + full_refresh: Optional[bool] = betterproto.bool_field(14, optional=True, group="_full_refresh") + unique_key: Optional[str] = betterproto.string_field(15, optional=True, group="_unique_key") + on_schema_change: str = betterproto.string_field(16) + grants: Dict[str, str] = betterproto.map_field( + 17, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + packages: List[str] = betterproto.string_field(18) + docs: "Docs" = betterproto.message_field(19) + strategy: Optional[str] = betterproto.string_field(20, optional=True, group="_strategy") + target_schema: Optional[str] = betterproto.string_field( + 21, optional=True, group="_target_schema" + ) + target_database: Optional[str] = betterproto.string_field( + 22, optional=True, group="_target_database" + ) + check_cols: List[str] = betterproto.string_field(23) + + +@dataclass(eq=False, repr=False) +class SourceConfig(betterproto.Message): + enabled: bool = betterproto.bool_field(1) + + +@dataclass(eq=False, repr=False) +class ExposureConfig(betterproto.Message): + enabled: bool = betterproto.bool_field(1) + + +@dataclass(eq=False, repr=False) +class MetricConfig(betterproto.Message): + enabled: bool = betterproto.bool_field(1) + + +@dataclass(eq=False, repr=False) +class ColumnInfo(betterproto.Message): + name: str = betterproto.string_field(1) + description: str = betterproto.string_field(2) + meta: Dict[str, str] = betterproto.map_field( + 3, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + data_type: str = betterproto.string_field(4) + quote: Optional[bool] = betterproto.bool_field(5, optional=True, group="_quote") + tags: List[str] = betterproto.string_field(6) + extra: Dict[str, str] = betterproto.map_field( + 7, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + + +@dataclass(eq=False, repr=False) +class ModelNode(betterproto.Message): + """ + There are three nodes that are exactly the same as this one (AnalysisNode, + RPCNode, and SqlNOde), and one with only one additional attribute + (HookNode). Making separate messages for now, but we can revisit later. If + we clean up the config classes, some of those nodes might end up with + different config classes, which would require separate node messages. + """ + + name: str = betterproto.string_field(1) + resource_type: str = betterproto.string_field(2) + package_name: str = betterproto.string_field(3) + path: str = betterproto.string_field(4) + original_file_path: str = betterproto.string_field(5) + unique_id: str = betterproto.string_field(6) + fqn: List[str] = betterproto.string_field(7) + database: Optional[str] = betterproto.string_field(8, optional=True, group="_database") + schema: str = betterproto.string_field(9) + alias: str = betterproto.string_field(10) + config: "NodeConfig" = betterproto.message_field(11) + tags: List[str] = betterproto.string_field(12) + description: str = betterproto.string_field(13) + columns: Dict[str, "ColumnInfo"] = betterproto.map_field( + 14, betterproto.TYPE_STRING, betterproto.TYPE_MESSAGE + ) + meta: Dict[str, str] = betterproto.map_field( + 15, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + docs: "Docs" = betterproto.message_field(16) + patch_path: str = betterproto.string_field(17) + deferred: bool = betterproto.bool_field(18) + unrendered_config: Dict[str, str] = betterproto.map_field( + 19, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + relation_name: str = betterproto.string_field(20) + raw_code: str = betterproto.string_field(21) + language: str = betterproto.string_field(22) + refs: List["ListOfStrings"] = betterproto.message_field(23) + sources: List["ListOfStrings"] = betterproto.message_field(24) + metrics: List["ListOfStrings"] = betterproto.message_field(25) + depends_on: "DependsOn" = betterproto.message_field(26) + compiled: bool = betterproto.bool_field(27) + compiled_code: str = betterproto.string_field(28) + + +@dataclass(eq=False, repr=False) +class AnalysisNode(betterproto.Message): + """This should be exactly the same as ModelNode""" + + name: str = betterproto.string_field(1) + resource_type: str = betterproto.string_field(2) + package_name: str = betterproto.string_field(3) + path: str = betterproto.string_field(4) + original_file_path: str = betterproto.string_field(5) + unique_id: str = betterproto.string_field(6) + fqn: List[str] = betterproto.string_field(7) + database: Optional[str] = betterproto.string_field(8, optional=True, group="_database") + schema: str = betterproto.string_field(9) + alias: str = betterproto.string_field(10) + config: "NodeConfig" = betterproto.message_field(11) + tags: List[str] = betterproto.string_field(12) + description: str = betterproto.string_field(13) + columns: Dict[str, "ColumnInfo"] = betterproto.map_field( + 14, betterproto.TYPE_STRING, betterproto.TYPE_MESSAGE + ) + meta: Dict[str, str] = betterproto.map_field( + 15, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + docs: "Docs" = betterproto.message_field(16) + patch_path: str = betterproto.string_field(17) + deferred: bool = betterproto.bool_field(18) + unrendered_config: Dict[str, str] = betterproto.map_field( + 19, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + relation_name: str = betterproto.string_field(20) + raw_code: str = betterproto.string_field(21) + language: str = betterproto.string_field(22) + refs: List["ListOfStrings"] = betterproto.message_field(23) + sources: List["ListOfStrings"] = betterproto.message_field(24) + metrics: List["ListOfStrings"] = betterproto.message_field(25) + depends_on: "DependsOn" = betterproto.message_field(26) + compiled: bool = betterproto.bool_field(27) + compiled_code: str = betterproto.string_field(28) + + +@dataclass(eq=False, repr=False) +class RPCNode(betterproto.Message): + """This should be exactly the same as ModelNode""" + + name: str = betterproto.string_field(1) + resource_type: str = betterproto.string_field(2) + package_name: str = betterproto.string_field(3) + path: str = betterproto.string_field(4) + original_file_path: str = betterproto.string_field(5) + unique_id: str = betterproto.string_field(6) + fqn: List[str] = betterproto.string_field(7) + database: Optional[str] = betterproto.string_field(8, optional=True, group="_database") + schema: str = betterproto.string_field(9) + alias: str = betterproto.string_field(10) + config: "NodeConfig" = betterproto.message_field(11) + tags: List[str] = betterproto.string_field(12) + description: str = betterproto.string_field(13) + columns: Dict[str, "ColumnInfo"] = betterproto.map_field( + 14, betterproto.TYPE_STRING, betterproto.TYPE_MESSAGE + ) + meta: Dict[str, str] = betterproto.map_field( + 15, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + docs: "Docs" = betterproto.message_field(16) + patch_path: str = betterproto.string_field(17) + deferred: bool = betterproto.bool_field(18) + unrendered_config: Dict[str, str] = betterproto.map_field( + 19, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + relation_name: str = betterproto.string_field(20) + raw_code: str = betterproto.string_field(21) + language: str = betterproto.string_field(22) + refs: List["ListOfStrings"] = betterproto.message_field(23) + sources: List["ListOfStrings"] = betterproto.message_field(24) + metrics: List["ListOfStrings"] = betterproto.message_field(25) + depends_on: "DependsOn" = betterproto.message_field(26) + compiled: bool = betterproto.bool_field(27) + compiled_code: str = betterproto.string_field(28) + + +@dataclass(eq=False, repr=False) +class SqlNode(betterproto.Message): + """This should be exactly the same as ModelNode""" + + name: str = betterproto.string_field(1) + resource_type: str = betterproto.string_field(2) + package_name: str = betterproto.string_field(3) + path: str = betterproto.string_field(4) + original_file_path: str = betterproto.string_field(5) + unique_id: str = betterproto.string_field(6) + fqn: List[str] = betterproto.string_field(7) + database: Optional[str] = betterproto.string_field(8, optional=True, group="_database") + schema: str = betterproto.string_field(9) + alias: str = betterproto.string_field(10) + config: "NodeConfig" = betterproto.message_field(11) + tags: List[str] = betterproto.string_field(12) + description: str = betterproto.string_field(13) + columns: Dict[str, "ColumnInfo"] = betterproto.map_field( + 14, betterproto.TYPE_STRING, betterproto.TYPE_MESSAGE + ) + meta: Dict[str, str] = betterproto.map_field( + 15, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + docs: "Docs" = betterproto.message_field(16) + patch_path: str = betterproto.string_field(17) + deferred: bool = betterproto.bool_field(18) + unrendered_config: Dict[str, str] = betterproto.map_field( + 19, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + relation_name: str = betterproto.string_field(20) + raw_code: str = betterproto.string_field(21) + language: str = betterproto.string_field(22) + refs: List["ListOfStrings"] = betterproto.message_field(23) + sources: List["ListOfStrings"] = betterproto.message_field(24) + metrics: List["ListOfStrings"] = betterproto.message_field(25) + depends_on: "DependsOn" = betterproto.message_field(26) + compiled: bool = betterproto.bool_field(27) + compiled_code: str = betterproto.string_field(28) + + +@dataclass(eq=False, repr=False) +class HookNode(betterproto.Message): + """ + This should be the same as ModelNode plus additional "index" attribute + """ + + name: str = betterproto.string_field(1) + resource_type: str = betterproto.string_field(2) + package_name: str = betterproto.string_field(3) + path: str = betterproto.string_field(4) + original_file_path: str = betterproto.string_field(5) + unique_id: str = betterproto.string_field(6) + fqn: List[str] = betterproto.string_field(7) + database: Optional[str] = betterproto.string_field(8, optional=True, group="_database") + schema: str = betterproto.string_field(9) + alias: str = betterproto.string_field(10) + config: "NodeConfig" = betterproto.message_field(11) + tags: List[str] = betterproto.string_field(12) + description: str = betterproto.string_field(13) + columns: Dict[str, "ColumnInfo"] = betterproto.map_field( + 14, betterproto.TYPE_STRING, betterproto.TYPE_MESSAGE + ) + meta: Dict[str, str] = betterproto.map_field( + 15, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + docs: "Docs" = betterproto.message_field(16) + patch_path: str = betterproto.string_field(17) + deferred: bool = betterproto.bool_field(18) + unrendered_config: Dict[str, str] = betterproto.map_field( + 19, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + relation_name: str = betterproto.string_field(20) + raw_code: str = betterproto.string_field(21) + language: str = betterproto.string_field(22) + refs: List["ListOfStrings"] = betterproto.message_field(23) + sources: List["ListOfStrings"] = betterproto.message_field(24) + metrics: List["ListOfStrings"] = betterproto.message_field(25) + depends_on: "DependsOn" = betterproto.message_field(26) + compiled: bool = betterproto.bool_field(27) + compiled_code: str = betterproto.string_field(28) + index: Optional[int] = betterproto.int32_field(29, optional=True, group="_index") + + +@dataclass(eq=False, repr=False) +class SeedNode(betterproto.Message): + name: str = betterproto.string_field(1) + resource_type: str = betterproto.string_field(2) + package_name: str = betterproto.string_field(3) + path: str = betterproto.string_field(4) + original_file_path: str = betterproto.string_field(5) + unique_id: str = betterproto.string_field(6) + fqn: List[str] = betterproto.string_field(7) + database: Optional[str] = betterproto.string_field(8, optional=True, group="_database") + schema: str = betterproto.string_field(9) + alias: str = betterproto.string_field(10) + config: "SeedConfig" = betterproto.message_field(11) + tags: List[str] = betterproto.string_field(12) + description: str = betterproto.string_field(13) + columns: Dict[str, "ColumnInfo"] = betterproto.map_field( + 14, betterproto.TYPE_STRING, betterproto.TYPE_MESSAGE + ) + meta: Dict[str, str] = betterproto.map_field( + 15, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + docs: "Docs" = betterproto.message_field(16) + patch_path: str = betterproto.string_field(17) + deferred: bool = betterproto.bool_field(18) + unrendered_config: Dict[str, str] = betterproto.map_field( + 19, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + relation_name: str = betterproto.string_field(20) + raw_code: str = betterproto.string_field(21) + root_path: str = betterproto.string_field(22) + + +@dataclass(eq=False, repr=False) +class SingularTestNode(betterproto.Message): + """Same as ModelNode except config is TestConfig""" + + name: str = betterproto.string_field(1) + resource_type: str = betterproto.string_field(2) + package_name: str = betterproto.string_field(3) + path: str = betterproto.string_field(4) + original_file_path: str = betterproto.string_field(5) + unique_id: str = betterproto.string_field(6) + fqn: List[str] = betterproto.string_field(7) + database: Optional[str] = betterproto.string_field(8, optional=True, group="_database") + schema: str = betterproto.string_field(9) + alias: str = betterproto.string_field(10) + config: "TestConfig" = betterproto.message_field(11) + tags: List[str] = betterproto.string_field(12) + description: str = betterproto.string_field(13) + columns: Dict[str, "ColumnInfo"] = betterproto.map_field( + 14, betterproto.TYPE_STRING, betterproto.TYPE_MESSAGE + ) + meta: Dict[str, str] = betterproto.map_field( + 15, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + docs: "Docs" = betterproto.message_field(16) + patch_path: str = betterproto.string_field(17) + deferred: bool = betterproto.bool_field(18) + unrendered_config: Dict[str, str] = betterproto.map_field( + 19, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + relation_name: str = betterproto.string_field(20) + raw_code: str = betterproto.string_field(21) + language: str = betterproto.string_field(22) + refs: List["ListOfStrings"] = betterproto.message_field(23) + sources: List["ListOfStrings"] = betterproto.message_field(24) + metrics: List["ListOfStrings"] = betterproto.message_field(25) + depends_on: "DependsOn" = betterproto.message_field(26) + compiled: bool = betterproto.bool_field(27) + compiled_code: str = betterproto.string_field(28) + + +@dataclass(eq=False, repr=False) +class TestMetadata(betterproto.Message): + name: str = betterproto.string_field(1) + kwargs: Dict[str, str] = betterproto.map_field( + 2, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + namespace: Optional[str] = betterproto.string_field(3, optional=True, group="_namespace") + + +@dataclass(eq=False, repr=False) +class GenericTestNode(betterproto.Message): + """ + Same as ModelNode except config is TestConfig, and has test_metadata and + column_name attributes. + """ + + name: str = betterproto.string_field(1) + resource_type: str = betterproto.string_field(2) + package_name: str = betterproto.string_field(3) + path: str = betterproto.string_field(4) + original_file_path: str = betterproto.string_field(5) + unique_id: str = betterproto.string_field(6) + fqn: List[str] = betterproto.string_field(7) + database: Optional[str] = betterproto.string_field(8, optional=True, group="_database") + schema: str = betterproto.string_field(9) + alias: str = betterproto.string_field(10) + config: "TestConfig" = betterproto.message_field(11) + tags: List[str] = betterproto.string_field(12) + description: str = betterproto.string_field(13) + columns: Dict[str, "ColumnInfo"] = betterproto.map_field( + 14, betterproto.TYPE_STRING, betterproto.TYPE_MESSAGE + ) + meta: Dict[str, str] = betterproto.map_field( + 15, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + docs: "Docs" = betterproto.message_field(16) + patch_path: str = betterproto.string_field(17) + deferred: bool = betterproto.bool_field(18) + unrendered_config: Dict[str, str] = betterproto.map_field( + 19, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + relation_name: str = betterproto.string_field(20) + raw_code: str = betterproto.string_field(21) + language: str = betterproto.string_field(22) + refs: List["ListOfStrings"] = betterproto.message_field(23) + sources: List["ListOfStrings"] = betterproto.message_field(24) + metrics: List["ListOfStrings"] = betterproto.message_field(25) + depends_on: "DependsOn" = betterproto.message_field(26) + compiled: bool = betterproto.bool_field(27) + compiled_code: str = betterproto.string_field(28) + test_metadata: "TestMetadata" = betterproto.message_field(29) + column_name: Optional[str] = betterproto.string_field(30, optional=True, group="_column_name") + + +@dataclass(eq=False, repr=False) +class SnapshotNode(betterproto.Message): + """SnapshotNode - Sames as ModelNode except with SnapshotConfig""" + + name: str = betterproto.string_field(1) + resource_type: str = betterproto.string_field(2) + package_name: str = betterproto.string_field(3) + path: str = betterproto.string_field(4) + original_file_path: str = betterproto.string_field(5) + unique_id: str = betterproto.string_field(6) + fqn: List[str] = betterproto.string_field(7) + database: Optional[str] = betterproto.string_field(8, optional=True, group="_database") + schema: str = betterproto.string_field(9) + alias: str = betterproto.string_field(10) + config: "SnapshotConfig" = betterproto.message_field(11) + tags: List[str] = betterproto.string_field(12) + description: str = betterproto.string_field(13) + columns: Dict[str, "ColumnInfo"] = betterproto.map_field( + 14, betterproto.TYPE_STRING, betterproto.TYPE_MESSAGE + ) + meta: Dict[str, str] = betterproto.map_field( + 15, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + docs: "Docs" = betterproto.message_field(16) + patch_path: str = betterproto.string_field(17) + deferred: bool = betterproto.bool_field(18) + unrendered_config: Dict[str, str] = betterproto.map_field( + 19, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + relation_name: str = betterproto.string_field(20) + raw_code: str = betterproto.string_field(21) + language: str = betterproto.string_field(22) + refs: List["ListOfStrings"] = betterproto.message_field(23) + sources: List["ListOfStrings"] = betterproto.message_field(24) + metrics: List["ListOfStrings"] = betterproto.message_field(25) + depends_on: "DependsOn" = betterproto.message_field(26) + compiled: bool = betterproto.bool_field(27) + compiled_code: str = betterproto.string_field(28) + + +@dataclass(eq=False, repr=False) +class Macro(betterproto.Message): + """Macro - BaseNode plus additional attributes""" + + name: str = betterproto.string_field(1) + resource_type: str = betterproto.string_field(2) + package_name: str = betterproto.string_field(3) + path: str = betterproto.string_field(4) + original_file_path: str = betterproto.string_field(5) + unique_id: str = betterproto.string_field(6) + macro_sql: str = betterproto.string_field(7) + depends_on: "MacroDependsOn" = betterproto.message_field(8) + description: str = betterproto.string_field(9) + meta: Dict[str, str] = betterproto.map_field( + 10, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + docs: "Docs" = betterproto.message_field(11) + patch_path: Optional[str] = betterproto.string_field(12, optional=True, group="_patch_path") + arguments: List["MacroArgument"] = betterproto.message_field(13) + supported_languages: List[str] = betterproto.string_field(14) + + +@dataclass(eq=False, repr=False) +class Documentation(betterproto.Message): + """Documentation - BaseNode plus block_contents""" + + name: str = betterproto.string_field(1) + resource_type: str = betterproto.string_field(2) + package_name: str = betterproto.string_field(3) + path: str = betterproto.string_field(4) + original_file_path: str = betterproto.string_field(5) + unique_id: str = betterproto.string_field(6) + block_contents: str = betterproto.string_field(7) + + +@dataclass(eq=False, repr=False) +class SourceDefinition(betterproto.Message): + name: str = betterproto.string_field(1) + resource_type: str = betterproto.string_field(2) + package_name: str = betterproto.string_field(3) + path: str = betterproto.string_field(4) + original_file_path: str = betterproto.string_field(5) + unique_id: str = betterproto.string_field(6) + fqn: List[str] = betterproto.string_field(7) + database: Optional[str] = betterproto.string_field(8, optional=True, group="_database") + schema: str = betterproto.string_field(9) + source_name: str = betterproto.string_field(10) + source_description: str = betterproto.string_field(11) + loader: str = betterproto.string_field(12) + identifier: str = betterproto.string_field(13) + quoting: "Quoting" = betterproto.message_field(14) + loaded_at_field: Optional[str] = betterproto.string_field( + 15, optional=True, group="_loaded_at_field" + ) + freshness: Optional["FreshnessThreshold"] = betterproto.message_field( + 16, optional=True, group="_freshness" + ) + description: str = betterproto.string_field(18) + """optional ExternalTable external = 17;""" + + columns: Dict[str, "ColumnInfo"] = betterproto.map_field( + 19, betterproto.TYPE_STRING, betterproto.TYPE_MESSAGE + ) + meta: Dict[str, str] = betterproto.map_field( + 20, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + source_meta: Dict[str, str] = betterproto.map_field( + 21, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + tags: List[str] = betterproto.string_field(22) + config: "SourceConfig" = betterproto.message_field(23) + patch_path: Optional[str] = betterproto.string_field(24, optional=True, group="_patch_path") + unrendered_config: Dict[str, str] = betterproto.map_field( + 25, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + relation_name: Optional[str] = betterproto.string_field( + 26, optional=True, group="_relation_name" + ) + + +@dataclass(eq=False, repr=False) +class Exposure(betterproto.Message): + name: str = betterproto.string_field(1) + resource_type: str = betterproto.string_field(2) + package_name: str = betterproto.string_field(3) + path: str = betterproto.string_field(4) + original_file_path: str = betterproto.string_field(5) + unique_id: str = betterproto.string_field(6) + fqn: List[str] = betterproto.string_field(7) + type: str = betterproto.string_field(8) + owner: "ExposureOwner" = betterproto.message_field(9) + description: str = betterproto.string_field(10) + label: Optional[str] = betterproto.string_field(11, optional=True, group="_label") + maturity: Optional[str] = betterproto.string_field(12, optional=True, group="_maturity") + meta: Dict[str, str] = betterproto.map_field( + 13, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + tags: List[str] = betterproto.string_field(14) + config: "ExposureConfig" = betterproto.message_field(15) + unrendered_config: Dict[str, str] = betterproto.map_field( + 16, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + url: Optional[str] = betterproto.string_field(17, optional=True, group="_url") + depends_on: "DependsOn" = betterproto.message_field(18) + refs: List["ListOfStrings"] = betterproto.message_field(24) + sources: List["ListOfStrings"] = betterproto.message_field(25) + metrics: List["ListOfStrings"] = betterproto.message_field(26) + + +@dataclass(eq=False, repr=False) +class Metric(betterproto.Message): + name: str = betterproto.string_field(1) + resource_type: str = betterproto.string_field(2) + package_name: str = betterproto.string_field(3) + path: str = betterproto.string_field(4) + original_file_path: str = betterproto.string_field(5) + unique_id: str = betterproto.string_field(6) + fqn: List[str] = betterproto.string_field(7) + description: str = betterproto.string_field(8) + label: str = betterproto.string_field(9) + calculation_method: str = betterproto.string_field(10) + expression: str = betterproto.string_field(11) + filters: List["MetricFilter"] = betterproto.message_field(12) + time_grains: List[str] = betterproto.string_field(13) + dimensions: List[str] = betterproto.string_field(14) + timestamp: Optional[str] = betterproto.string_field(15, optional=True, group="_timestamp") + window: Optional["MetricTime"] = betterproto.message_field(16, optional=True, group="_window") + model: Optional[str] = betterproto.string_field(17, optional=True, group="_model") + model_unique_id: Optional[str] = betterproto.string_field( + 18, optional=True, group="_model_unique_id" + ) + meta: Dict[str, str] = betterproto.map_field( + 19, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + tags: List[str] = betterproto.string_field(20) + config: "MetricConfig" = betterproto.message_field(21) + unrendered_config: Dict[str, str] = betterproto.map_field( + 22, betterproto.TYPE_STRING, betterproto.TYPE_STRING + ) + refs: List["ListOfStrings"] = betterproto.message_field(24) + sources: List["ListOfStrings"] = betterproto.message_field(25) + metrics: List["ListOfStrings"] = betterproto.message_field(26) + depends_on: "DependsOn" = betterproto.message_field(27) diff --git a/core/dbt/contracts/graph/unparsed.py b/core/dbt/contracts/graph/unparsed.py index 6521e644542..3cbd53b763d 100644 --- a/core/dbt/contracts/graph/unparsed.py +++ b/core/dbt/contracts/graph/unparsed.py @@ -8,6 +8,7 @@ Replaceable, rename_metric_attr, ) +from dbt.contracts.graph import proto_nodes # trigger the PathEncoder import dbt.helper_types # noqa:F401 @@ -86,6 +87,12 @@ class Docs(dbtClassMixin, Replaceable): show: bool = True node_color: Optional[str] = None + def to_msg(self): + return proto_nodes.Docs( + show=self.show, + node_color=self.node_color, + ) + @dataclass class HasDocs(AdditionalPropertiesMixin, ExtensibleDbtClassMixin, Replaceable): @@ -157,6 +164,11 @@ class MacroArgument(dbtClassMixin): type: Optional[str] = None description: str = "" + def to_msg(self): + return proto_nodes.MacroArgument( + name=self.name, type=self.type, description=self.description + ) + @dataclass class UnparsedMacroUpdate(HasConfig, HasDocs, HasYamlMetadata): @@ -177,6 +189,12 @@ class Time(dbtClassMixin, Mergeable): count: Optional[int] = None period: Optional[TimePeriod] = None + def to_msg(self): + return proto_nodes.Time( + count=self.count, + period=(None if not self.period else self.period.value), + ) + def exceeded(self, actual_age: float) -> bool: if self.period is None or self.count is None: return False @@ -194,6 +212,14 @@ class FreshnessThreshold(dbtClassMixin, Mergeable): error_after: Optional[Time] = field(default_factory=Time) filter: Optional[str] = None + def to_msg(self): + msg = proto_nodes.FreshnessThreshold(filter=self.filter) + if self.warn_after: + msg.warn_after = self.warn_after.to_msg() + if self.error_after: + msg.error_after = self.error_after.to_msg() + return msg + def status(self, age: float) -> "dbt.contracts.results.FreshnessStatus": from dbt.contracts.results import FreshnessStatus @@ -244,6 +270,14 @@ class Quoting(dbtClassMixin, Mergeable): identifier: Optional[bool] = None column: Optional[bool] = None + def to_msg(self): + return proto_nodes.Quoting( + database=self.database, + schema=self.schema, + identifier=self.identifier, + column=self.column, + ) + @dataclass class UnparsedSourceTableDefinition(HasColumnTests, HasTests): @@ -428,6 +462,12 @@ class ExposureOwner(dbtClassMixin, Replaceable): email: str name: Optional[str] = None + def to_msg(self): + return proto_nodes.ExposureOwner( + email=self.email, + name=self.name, + ) + @dataclass class UnparsedExposure(dbtClassMixin, Replaceable): @@ -459,6 +499,9 @@ class MetricFilter(dbtClassMixin, Replaceable): # TODO : Can we make this Any? value: str + def to_msg(self): + return proto_nodes.MetricFilter(field=self.field, operator=self.operator, value=self.value) + class MetricTimePeriod(StrEnum): day = "day" @@ -475,6 +518,11 @@ class MetricTime(dbtClassMixin, Mergeable): count: Optional[int] = None period: Optional[MetricTimePeriod] = None + def to_msg(self): + return proto_nodes.MetricTime( + count=self.count, period=(self.period.value if self.period else None) + ) + def __bool__(self): return self.count is not None and self.period is not None diff --git a/core/dbt/contracts/graph/utils.py b/core/dbt/contracts/graph/utils.py index c1003b2d9d9..931b1cdbe04 100644 --- a/core/dbt/contracts/graph/utils.py +++ b/core/dbt/contracts/graph/utils.py @@ -1,4 +1,23 @@ import re +import enum +from dbt.utils import cast_dict_to_dict_of_strings + + +def get_msg_attribute_value(obj, attribute): + orig_value = getattr(obj, attribute) + value = orig_value + if isinstance(orig_value, enum.Enum): + value = orig_value.value + elif hasattr(value, "to_msg"): + value = value.to_msg() + elif attribute == "columns": + value = {} + for k, v in orig_value: + value[k] = orig_value.to_msg() + elif isinstance(orig_value, dict): + value = cast_dict_to_dict_of_strings(value) + return value + HTML_COLORS = [ "aliceblue", diff --git a/core/dbt/events/README.md b/core/dbt/events/README.md index 52edd7d35d4..28ff5005c3f 100644 --- a/core/dbt/events/README.md +++ b/core/dbt/events/README.md @@ -51,3 +51,7 @@ logger = AdapterLogger("") ## Compiling types.proto After adding a new message in types.proto, in the core/dbt/events directory: ```protoc --python_betterproto_out . types.proto``` + +After switching to the 2.0.0b5 release of betterproto, there is now a bug where it generates incorrectly named python classes when the names have acronyms like SQL or YAML in them. For now, I'm renaming these. The bug has been fixed in the repo, so hopefully there will be a new release at some point. (SQL, YAML, GET get turned int Sql, Yaml, Get). + +In addition, betterproto now puts the generated file in proto_types/__init__.py. I'm moving it to proto_types.py diff --git a/core/dbt/events/types.proto b/core/dbt/events/types.proto index 5e595621104..3ef0d752fd4 100644 --- a/core/dbt/events/types.proto +++ b/core/dbt/events/types.proto @@ -1579,10 +1579,8 @@ message LogFreshnessResultMsg { LogFreshnessResult data = 2; } - // Skipped Q019, Q020, Q021 - // Q022 message LogCancelLine { string conn_name = 1; diff --git a/core/dbt/parser/manifest.py b/core/dbt/parser/manifest.py index 6385fb6ec72..6b915cec1ee 100644 --- a/core/dbt/parser/manifest.py +++ b/core/dbt/parser/manifest.py @@ -49,6 +49,7 @@ from dbt.contracts.files import FileHash, ParseFileType, SchemaSourceFile from dbt.parser.read_files import read_files, load_source_file from dbt.parser.partial import PartialParsing, special_override_macros +from dbt.constants import MANIFEST_FILE_NAME from dbt.contracts.graph.manifest import ( Manifest, Disabled, @@ -85,7 +86,6 @@ from dbt.dataclass_schema import StrEnum, dbtClassMixin -MANIFEST_FILE_NAME = "manifest.json" PARTIAL_PARSE_FILE_NAME = "partial_parse.msgpack" PARSING_STATE = DbtProcessState("parsing") PERF_INFO_FILE_NAME = "perf_info.json" @@ -408,6 +408,9 @@ def load(self): # write out the fully parsed manifest self.write_manifest_for_partial_parse() + # write out manifest.json + parsed_manifest_path = os.path.join(self.root_project.target_path, MANIFEST_FILE_NAME) + self.manifest.write(parsed_manifest_path) return self.manifest @@ -583,7 +586,8 @@ def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]: ) fire_event( Note( - msg=f"previous checksum: {self.manifest.state_check.vars_hash.checksum}, current checksum: {manifest.state_check.vars_hash.checksum}" + msg=f"previous checksum: {manifest.state_check.vars_hash.checksum}, " + f"current checksum: {self.manifest.state_check.vars_hash.checksum}" ), level=EventLevel.DEBUG, ) diff --git a/core/dbt/task/parse.py b/core/dbt/task/parse.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/core/dbt/utils.py b/core/dbt/utils.py index 27309c4b373..51ac8c3693b 100644 --- a/core/dbt/utils.py +++ b/core/dbt/utils.py @@ -694,3 +694,10 @@ def cast_dict_to_dict_of_strings(dct): for k, v in dct.items(): new_dct[str(k)] = str(v) return new_dct + + +def cast_to_bool(bool_value: Optional[bool]) -> bool: + if bool_value is None: + return False + else: + return True diff --git a/core/setup.py b/core/setup.py index f48fa4f3237..eed56e059b8 100644 --- a/core/setup.py +++ b/core/setup.py @@ -48,7 +48,7 @@ install_requires=[ "Jinja2==3.1.2", "agate>=1.6,<1.7.1", - "betterproto==1.2.5", + "betterproto==2.0.0b5", "click>=7.0,<9", "colorama>=0.3.9,<0.4.7", "hologram>=0.0.14,<=0.0.15", diff --git a/dev-requirements.txt b/dev-requirements.txt index ada2d734b61..936d40c4a5f 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,4 +1,4 @@ -betterproto[compiler]==1.2.5 +betterproto[compiler]==2.0.0b5 black==22.10.0 bumpversion docutils diff --git a/tests/unit/test_proto_nodes.py b/tests/unit/test_proto_nodes.py new file mode 100644 index 00000000000..ce5d9e33e62 --- /dev/null +++ b/tests/unit/test_proto_nodes.py @@ -0,0 +1,378 @@ +from dbt.contracts.graph.nodes import ( + ModelNode, + AnalysisNode, + RPCNode, + SqlNode, + HookNode, + SeedNode, + SingularTestNode, + GenericTestNode, + TestMetadata, + SnapshotNode, + Macro, + Documentation, + SourceDefinition, + Exposure, + Metric, +) +from dbt.contracts.graph.model_config import ( + NodeConfig, + SeedConfig, + TestConfig, + SnapshotConfig, + SourceConfig, + ExposureConfig, + MetricConfig, +) +from dbt.contracts.graph.unparsed import ExposureOwner, MetricFilter +from dbt.node_types import NodeType, ModelLanguage +from dbt.contracts.files import FileHash + + +def test_nodes(): + # Create NodeConfig for use in the next 5 nodes + node_config = NodeConfig( + enabled=True, + alias=None, + schema="my_schema", + database="my_database", + tags=["one", "two", "three"], + meta={"one": 1, "two": 2}, + materialized="table", + incremental_strategy=None, + full_refresh=None, + on_schema_change="ignore", + packages=["one", "two", "three"], + ) + # Create a dummy ModelNode + model_node = ModelNode( + database="testdb", + schema="testschema", + fqn=["my", "test"], + unique_id="test.model.my_node", + raw_code="select 1 from fun", + language="sql", + package_name="test", + path="my_node.sql", + original_file_path="models/my_node.sql", + name="my_node", + resource_type=NodeType.Model, + alias="my_node", + checksum=FileHash.from_contents("select 1 from fun"), + config=node_config, + ) + assert model_node + # Get a matching proto message + proto_model_msg = model_node.to_msg() + assert proto_model_msg + + # Create a dummy AnalysisNode + analysis_node = AnalysisNode( + database="testdb", + schema="testschema", + fqn=["my", "test"], + unique_id="test.model.my_node", + raw_code="select 1 from fun", + language="sql", + package_name="test", + path="my_node.sql", + original_file_path="models/my_node.sql", + name="my_node", + resource_type=NodeType.Analysis, + alias="my_node", + checksum=FileHash.from_contents("select 1 from fun"), + config=node_config, + ) + assert analysis_node + # Get a matching proto message + proto_analysis_msg = analysis_node.to_msg() + assert proto_analysis_msg + + # Create a dummy RPCNode + rpc_node = RPCNode( + database="testdb", + schema="testschema", + fqn=["my", "test"], + unique_id="test.model.my_node", + raw_code="select 1 from fun", + language="sql", + package_name="test", + path="my_node.sql", + original_file_path="models/my_node.sql", + name="my_node", + resource_type=NodeType.RPCCall, + alias="my_node", + checksum=FileHash.from_contents("select 1 from fun"), + config=node_config, + ) + assert rpc_node + # Get a matching proto message + proto_rpc_msg = rpc_node.to_msg() + assert proto_rpc_msg + + # Create a dummy SqlNode + sql_node = SqlNode( + database="testdb", + schema="testschema", + fqn=["my", "test"], + unique_id="test.model.my_node", + raw_code="select 1 from fun", + language="sql", + package_name="test", + path="my_node.sql", + original_file_path="models/my_node.sql", + name="my_node", + resource_type=NodeType.SqlOperation, + alias="my_node", + checksum=FileHash.from_contents("select 1 from fun"), + config=node_config, + ) + assert sql_node + # Get a matching proto message + proto_sql_msg = sql_node.to_msg() + assert proto_sql_msg + + # Create a dummy HookNode + hook_node = HookNode( + database="testdb", + schema="testschema", + fqn=["my", "test"], + unique_id="hook.test.my_hook", + raw_code="select 1 from fun", + language="sql", + package_name="test", + path="my_node.sql", + original_file_path="models/my_node.sql", + name="my_hook", + resource_type=NodeType.Operation, + alias="my_hook", + checksum=FileHash.from_contents("select 1 from fun"), + config=node_config, + index=1, + ) + assert hook_node + # Get a matching proto message + proto_hook_msg = hook_node.to_msg() + assert proto_hook_msg + assert proto_hook_msg.index + + # Create a dummy SeedNode + seed_config = SeedConfig( + enabled=True, + alias=None, + schema="my_schema", + database="my_database", + tags=["one", "two", "three"], + meta={"one": 1, "two": 2}, + ) + seed_node = SeedNode( + database="testdb", + schema="testschema", + fqn=["my", "test"], + unique_id="test.seed.my_node", + raw_code="", + package_name="test", + path="seed.csv", + original_file_path="seeds/seed.csv", + name="seed", + resource_type=NodeType.Seed, + alias="seed", + checksum=FileHash.from_contents("test"), + root_path="some_path", + config=seed_config, + ) + assert seed_node + # Get a matching proto message + proto_seed_msg = seed_node.to_msg() + assert proto_seed_msg + assert proto_seed_msg.root_path + + # config for SingularTestNode and GenericTestNode + test_config = TestConfig( + enabled=True, + alias=None, + schema="my_schema", + database="my_database", + tags=["one", "two", "three"], + meta={"one": 1, "two": 2}, + ) + + # Create a dummy SingularTestNode + singular_test_node = SingularTestNode( + database="testdb", + schema="testschema", + fqn=["my", "test"], + unique_id="test.model.my_node", + raw_code="select 1 from fun", + package_name="test", + path="my_node.sql", + original_file_path="models/my_node.sql", + name="my_node", + resource_type=NodeType.Test, + alias="my_node", + checksum=FileHash.from_contents("select 1 from fun"), + config=test_config, + ) + assert singular_test_node + # Get a matching proto message + proto_singular_test_msg = singular_test_node.to_msg() + assert proto_singular_test_msg + + # Create a dummy GenericTestNode + test_metadata = TestMetadata( + name="my_test", + kwargs={"one": 1, "two": "another"}, + ) + generic_test_node = GenericTestNode( + database="testdb", + schema="testschema", + fqn=["my", "test"], + unique_id="test.model.my_node", + raw_code="select 1 from fun", + package_name="test", + path="my_node.sql", + original_file_path="models/my_node.sql", + name="my_node", + resource_type=NodeType.Test, + alias="my_node", + checksum=FileHash.from_contents("select 1 from fun"), + config=test_config, + test_metadata=test_metadata, + column_name="some_column", + ) + assert generic_test_node + # Get a matching proto message + proto_generic_test_msg = generic_test_node.to_msg() + assert proto_generic_test_msg + assert proto_generic_test_msg.column_name + + # Create SnapshotConfig and SnapshotNode + snapshot_config = SnapshotConfig( + enabled=True, + alias=None, + schema="my_schema", + database="my_database", + tags=["one", "two", "three"], + meta={"one": 1, "two": 2}, + materialized="table", + incremental_strategy=None, + full_refresh=None, + on_schema_change="ignore", + packages=["one", "two", "three"], + strategy="check", + target_schema="some_schema", + target_database="some_database", + check_cols="id", + ) + snapshot_node = SnapshotNode( + database="testdb", + schema="testschema", + fqn=["my", "test"], + unique_id="test.model.my_test", + raw_code="select 1 from fun", + language="sql", + package_name="my_project", + path="my_node.sql", + original_file_path="models/my_node.sql", + name="my_test", + resource_type=NodeType.Snapshot, + alias="my_node", + checksum=FileHash.from_contents("select 1 from fun"), + config=snapshot_config, + ) + assert snapshot_node + # Get a matching proto message + proto_snapshot_msg = snapshot_node.to_msg() + assert proto_snapshot_msg + assert proto_snapshot_msg.config.target_schema + + # Create a dummy Macro + macro = Macro( + name="my_macro", + resource_type=NodeType.Macro, + package_name="my_project", + path="my_macro.sql", + original_file_path="macros/my_macro.sql", + unique_id="macro.my_project.my_macro", + macro_sql="{{ }}", + description="my macro", + supported_languages=[ModelLanguage.sql], + ) + proto_macro_msg = macro.to_msg() + assert proto_macro_msg + assert proto_macro_msg.supported_languages == ["sql"] + + # Create a dummy Documentation + doc = Documentation( + name="my_doc", + resource_type=NodeType.Macro, + package_name="my_project", + path="readme.md", + original_file_path="models/readme.md", + unique_id="doc.my_project.my_doc", + block_contents="this is my special doc", + ) + proto_doc_msg = doc.to_msg() + assert proto_doc_msg + assert proto_doc_msg.block_contents + + # Dummy SourceDefinition + source = SourceDefinition( + name="my_source", + resource_type=NodeType.Source, + package_name="my_project", + path="source.yml", + original_file_path="source.yml", + unique_id="source.my_project.my_source", + fqn=["sources", "my_source"], + database="my_database", + schema="my_schema", + source_name="my_source", + source_description="my source", + loader="loader", + identifier="my_source", + config=SourceConfig(enabled=True), + ) + proto_source_msg = source.to_msg() + assert proto_source_msg + assert proto_source_msg.source_name + + # Dummy Exposure + exposure = Exposure( + name="my_exposure", + resource_type=NodeType.Exposure, + package_name="my_project", + path="exposure.yml", + original_file_path="exposure.yml", + unique_id="exposure.my_project.my_exposure", + fqn=["my", "exposure"], + config=ExposureConfig(enabled=True), + type="dashboard", + owner=ExposureOwner(email="someone@somewhere"), + description="my exposure", + ) + proto_exposure_msg = exposure.to_msg() + assert proto_exposure_msg + assert proto_exposure_msg.type + + # Dummy Metric + metric = Metric( + name="my_metric", + resource_type=NodeType.Metric, + package_name="my_project", + path="metrics.yml", + original_file_path="metrics.yml", + unique_id="metric.my_project.my_metric", + fqn=["my", "metric"], + config=MetricConfig(enabled=True), + description="my metric", + label="my label", + calculation_method="*", + expression="select 1 as fun", + filters=[MetricFilter(field="sum", operator="sum", value="sum")], + time_grains=["day", "minute"], + dimensions=["day", "minute"], + ) + proto_metric_msg = metric.to_msg() + assert proto_metric_msg + assert proto_metric_msg.label