Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make fixtures files full-fledged members of manifest and enable partial parsing #9225

Merged
merged 17 commits into from
Dec 7, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Features-20231205-200447.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Features
body: Make fixture files full-fledged parts of the manifest and enable partial parsing
time: 2023-12-05T20:04:47.117029-05:00
custom:
Author: gshank
Issue: "9067"
17 changes: 15 additions & 2 deletions core/dbt/contracts/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
Documentation = "docs"
Schema = "schema"
Hook = "hook" # not a real filetype, from dbt_project.yml
Fixture = "fixture"


parse_file_type_to_parser = {
Expand All @@ -35,6 +36,7 @@
ParseFileType.Documentation: "DocumentationParser",
ParseFileType.Schema: "SchemaParser",
ParseFileType.Hook: "HookParser",
ParseFileType.Fixture: "FixtureParser",
}


Expand Down Expand Up @@ -152,7 +154,6 @@
parse_file_type: Optional[ParseFileType] = None
# we don't want to serialize this
contents: Optional[str] = None
# the unique IDs contained in this file

@property
def file_id(self):
Expand All @@ -168,6 +169,8 @@
def _deserialize(cls, dct: Dict[str, int]):
if dct["parse_file_type"] == "schema":
sf = SchemaSourceFile.from_dict(dct)
elif dct["parse_file_type"] == "fixture":
sf = FixtureSourceFile.from_dict(dct)
else:
sf = SourceFile.from_dict(dct)
return sf
Expand Down Expand Up @@ -328,4 +331,14 @@
del self.env_vars[yaml_key]


AnySourceFile = Union[SchemaSourceFile, SourceFile]
@dataclass
class FixtureSourceFile(BaseSourceFile):
fixture: Optional[str] = None
unit_tests: List[str] = field(default_factory=list)

def add_unit_test(self, value):
if value not in self.unit_tests:
self.unit_tests.append(value)

Check warning on line 341 in core/dbt/contracts/files.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/contracts/files.py#L340-L341

Added lines #L340 - L341 were not covered by tests


AnySourceFile = Union[SchemaSourceFile, SourceFile, FixtureSourceFile]
20 changes: 19 additions & 1 deletion core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,17 @@
SourceDefinition,
UnpatchedSourceDefinition,
UnitTestDefinition,
UnitTestFileFixture,
)
from dbt.contracts.graph.unparsed import SourcePatch, NodeVersion, UnparsedVersion
from dbt.contracts.graph.manifest_upgrade import upgrade_manifest_json
from dbt.contracts.files import SourceFile, SchemaSourceFile, FileHash, AnySourceFile
from dbt.contracts.files import (
SourceFile,
SchemaSourceFile,
FileHash,
AnySourceFile,
FixtureSourceFile,
)
from dbt.contracts.util import (
BaseArtifactMetadata,
SourceKey,
Expand Down Expand Up @@ -802,6 +809,7 @@
semantic_models: MutableMapping[str, SemanticModel] = field(default_factory=dict)
unit_tests: MutableMapping[str, UnitTestDefinition] = field(default_factory=dict)
saved_queries: MutableMapping[str, SavedQuery] = field(default_factory=dict)
fixtures: MutableMapping[str, UnitTestFileFixture] = field(default_factory=dict)

_doc_lookup: Optional[DocLookup] = field(
default=None, metadata={"serialize": lambda x: None, "deserialize": lambda x: None}
Expand Down Expand Up @@ -1444,6 +1452,8 @@
source_file.exposures.append(node.unique_id)
if isinstance(node, Group):
source_file.groups.append(node.unique_id)
elif isinstance(source_file, FixtureSourceFile):
pass

Check warning on line 1456 in core/dbt/contracts/graph/manifest.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/contracts/graph/manifest.py#L1456

Added line #L1456 was not covered by tests
else:
source_file.nodes.append(node.unique_id)

Expand Down Expand Up @@ -1486,6 +1496,8 @@
source_file.semantic_models.append(node.unique_id)
if isinstance(node, Exposure):
source_file.exposures.append(node.unique_id)
elif isinstance(source_file, FixtureSourceFile):
pass

Check warning on line 1500 in core/dbt/contracts/graph/manifest.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/contracts/graph/manifest.py#L1500

Added line #L1500 was not covered by tests
else:
source_file.nodes.append(node.unique_id)

Expand All @@ -1505,6 +1517,12 @@
self.unit_tests[unit_test.unique_id] = unit_test
source_file.unit_tests.append(unit_test.unique_id)

def add_fixture(self, source_file: FixtureSourceFile, fixture: UnitTestFileFixture):
if fixture.unique_id in self.fixtures:
raise DuplicateResourceNameError(fixture, self.fixtures[fixture.unique_id])
self.fixtures[fixture.unique_id] = fixture
source_file.fixture = fixture.unique_id

def add_saved_query(self, source_file: SchemaSourceFile, saved_query: SavedQuery) -> None:
_check_duplicates(saved_query, self.saved_queries)
self.saved_queries[saved_query.unique_id] = saved_query
Expand Down
10 changes: 8 additions & 2 deletions core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1116,14 +1116,14 @@ def tags(self) -> List[str]:
tags = self.config.tags
return [tags] if isinstance(tags, str) else tags

def build_unit_test_checksum(self, project_root: str, fixture_paths: List[str]):
def build_unit_test_checksum(self):
# everything except 'description'
data = f"{self.model}-{self.given}-{self.expect}-{self.overrides}"

# include underlying fixture data
for input in self.given:
if input.fixture:
data += f"-{input.get_rows(project_root, fixture_paths)}"
data += f"-{input.rows}"

self.checksum = hashlib.new("sha256", data.encode("utf-8")).hexdigest()

Expand All @@ -1134,6 +1134,12 @@ def same_contents(self, other: Optional["UnitTestDefinition"]) -> bool:
return self.checksum == other.checksum


@dataclass
class UnitTestFileFixture(BaseNode):
resource_type: Literal[NodeType.Fixture]
rows: Optional[List[Dict[str, Any]]] = None


# ====================================
# Snapshot node
# ====================================
Expand Down
67 changes: 2 additions & 65 deletions core/dbt/contracts/graph/unparsed.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import datetime
import re
import csv
from io import StringIO

from dbt import deprecations
from dbt.clients.system import find_matching
from dbt.node_types import NodeType
from dbt.contracts.graph.semantic_models import (
Defaults,
Expand Down Expand Up @@ -769,76 +766,16 @@ class UnitTestFormat(StrEnum):
Dict = "dict"


class UnitTestFixture:
@property
def format(self) -> UnitTestFormat:
return UnitTestFormat.Dict

@property
def rows(self) -> Optional[Union[str, List[Dict[str, Any]]]]:
return None

@property
def fixture(self) -> Optional[str]:
return None

def get_rows(self, project_root: str, paths: List[str]) -> List[Dict[str, Any]]:
if self.format == UnitTestFormat.Dict:
assert isinstance(self.rows, List)
return self.rows
elif self.format == UnitTestFormat.CSV:
rows = []
if self.fixture is not None:
assert isinstance(self.fixture, str)
file_path = self.get_fixture_path(self.fixture, project_root, paths)
with open(file_path, newline="") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
rows.append(row)
else: # using inline csv
assert isinstance(self.rows, str)
dummy_file = StringIO(self.rows)
reader = csv.DictReader(dummy_file)
rows = []
for row in reader:
rows.append(row)
return rows

def get_fixture_path(self, fixture: str, project_root: str, paths: List[str]) -> str:
fixture_path = f"{fixture}.csv"
matches = find_matching(project_root, paths, fixture_path)
if len(matches) == 0:
raise ParsingError(f"Could not find fixture file {fixture} for unit test")
elif len(matches) > 1:
raise ParsingError(
f"Found multiple fixture files named {fixture} at {[d['relative_path'] for d in matches]}. Please use a unique name for each fixture file."
)

return matches[0]["absolute_path"]

def validate_fixture(self, fixture_type, test_name) -> None:
if self.format == UnitTestFormat.Dict and not isinstance(self.rows, list):
raise ParsingError(
f"Unit test {test_name} has {fixture_type} rows which do not match format {self.format}"
)
if self.format == UnitTestFormat.CSV and not (
isinstance(self.rows, str) or isinstance(self.fixture, str)
):
raise ParsingError(
f"Unit test {test_name} has {fixture_type} rows or fixtures which do not match format {self.format}. Expected string."
)


@dataclass
class UnitTestInputFixture(dbtClassMixin, UnitTestFixture):
class UnitTestInputFixture(dbtClassMixin):
input: str
rows: Optional[Union[str, List[Dict[str, Any]]]] = None
format: UnitTestFormat = UnitTestFormat.Dict
fixture: Optional[str] = None


@dataclass
class UnitTestOutputFixture(dbtClassMixin, UnitTestFixture):
class UnitTestOutputFixture(dbtClassMixin):
rows: Optional[Union[str, List[Dict[str, Any]]]] = None
format: UnitTestFormat = UnitTestFormat.Dict
fixture: Optional[str] = None
Expand Down
30 changes: 15 additions & 15 deletions core/dbt/graph/selector_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,21 +727,21 @@ def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[Uniqu

manifest: WritableManifest = self.previous_state.manifest

for node, real_node in self.all_nodes(included_nodes):
for unique_id, node in self.all_nodes(included_nodes):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thank you for renaming this here -- this has been bugging me forever!

previous_node: Optional[SelectorTarget] = None

if node in manifest.nodes:
previous_node = manifest.nodes[node]
elif node in manifest.sources:
previous_node = manifest.sources[node]
elif node in manifest.exposures:
previous_node = manifest.exposures[node]
elif node in manifest.metrics:
previous_node = manifest.metrics[node]
elif node in manifest.semantic_models:
previous_node = manifest.semantic_models[node]
elif node in manifest.unit_tests:
previous_node = manifest.unit_tests[node]
if unique_id in manifest.nodes:
previous_node = manifest.nodes[unique_id]
elif unique_id in manifest.sources:
previous_node = manifest.sources[unique_id]
elif unique_id in manifest.exposures:
previous_node = manifest.exposures[unique_id]
elif unique_id in manifest.metrics:
previous_node = manifest.metrics[unique_id]
elif unique_id in manifest.semantic_models:
previous_node = manifest.semantic_models[unique_id]
elif unique_id in manifest.unit_tests:
previous_node = manifest.unit_tests[unique_id]

keyword_args = {}
if checker.__name__ in [
Expand All @@ -751,8 +751,8 @@ def search(self, included_nodes: Set[UniqueId], selector: str) -> Iterator[Uniqu
]:
keyword_args["adapter_type"] = adapter_type # type: ignore

if checker(previous_node, real_node, **keyword_args): # type: ignore
yield node
if checker(previous_node, node, **keyword_args): # type: ignore
yield unique_id


class ResultSelectorMethod(SelectorMethod):
Expand Down
1 change: 1 addition & 0 deletions core/dbt/node_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class NodeType(StrEnum):
SavedQuery = "saved_query"
SemanticModel = "semantic_model"
Unit = "unit_test"
Fixture = "fixture"

@classmethod
def executable(cls) -> List["NodeType"]:
Expand Down
46 changes: 46 additions & 0 deletions core/dbt/parser/fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from typing import Optional, Dict, List, Any
from io import StringIO
import csv

from dbt.contracts.files import FixtureSourceFile
from dbt.contracts.graph.nodes import UnitTestFileFixture
from dbt.node_types import NodeType
from dbt.parser.base import Parser
from dbt.parser.search import FileBlock


class FixtureParser(Parser[UnitTestFileFixture]):
@property
def resource_type(self) -> NodeType:
return NodeType.Fixture

@classmethod
def get_compiled_path(cls, block: FileBlock):
# Is this necessary?
return block.path.relative_path

Check warning on line 20 in core/dbt/parser/fixtures.py

View check run for this annotation

Codecov / codecov/patch

core/dbt/parser/fixtures.py#L20

Added line #L20 was not covered by tests

def generate_unique_id(self, resource_name: str, _: Optional[str] = None) -> str:
return f"fixture.{self.project.project_name}.{resource_name}"

def parse_file(self, file_block: FileBlock):
assert isinstance(file_block.file, FixtureSourceFile)
unique_id = self.generate_unique_id(file_block.name)

fixture = UnitTestFileFixture(
name=file_block.name,
path=file_block.file.path.relative_path,
original_file_path=file_block.path.original_file_path,
package_name=self.project.project_name,
unique_id=unique_id,
resource_type=NodeType.Fixture,
rows=self.get_rows(file_block.file.contents),
)
self.manifest.add_fixture(file_block.file, fixture)

def get_rows(self, contents) -> List[Dict[str, Any]]:
rows = []
dummy_file = StringIO(contents)
reader = csv.DictReader(dummy_file)
for row in reader:
rows.append(row)
return rows
2 changes: 2 additions & 0 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
from dbt.parser.generic_test import GenericTestParser
from dbt.parser.singular_test import SingularTestParser
from dbt.parser.docs import DocumentationParser
from dbt.parser.fixtures import FixtureParser
from dbt.parser.hooks import HookParser
from dbt.parser.macros import MacroParser
from dbt.parser.models import ModelParser
Expand Down Expand Up @@ -471,6 +472,7 @@ def load(self) -> Manifest:
SeedParser,
DocumentationParser,
HookParser,
FixtureParser,
]
for project in self.all_projects.values():
if project.project_name not in project_parser_files:
Expand Down
Loading