Skip to content

Commit

Permalink
Add documentation for macros/analyses
Browse files Browse the repository at this point in the history
Refactored parsing a bit to support the idea of:
 - macro patches (no columns)
 - node patches that don't get tests (analyses)
  • Loading branch information
Jacob Beck committed Jan 27, 2020
1 parent 4e23e7d commit abab3c7
Show file tree
Hide file tree
Showing 15 changed files with 597 additions and 138 deletions.
47 changes: 39 additions & 8 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@
import os
from dataclasses import dataclass, field
from datetime import datetime
from typing import Dict, List, Optional, Union, Mapping, Any
from typing import (
Dict, List, Optional, Union, Mapping, Any, Tuple, MutableMapping
)
from uuid import UUID

from hologram import JsonSchemaMixin

from dbt.contracts.graph.parsed import ParsedNode, ParsedMacro, \
ParsedDocumentation
from dbt.contracts.graph.parsed import (
ParsedNode, ParsedMacro, ParsedDocumentation, ParsedNodePatch,
ParsedMacroPatch, ParsedSourceDefinition
)
from dbt.contracts.graph.compiled import CompileResultNode
from dbt.contracts.util import Writable, Replaceable
from dbt.exceptions import (
Expand All @@ -24,6 +28,7 @@
import dbt.utils

NodeEdgeMap = Dict[str, List[str]]
MacroKey = Tuple[str, str]


@dataclass
Expand Down Expand Up @@ -130,6 +135,8 @@ class SourceFile(JsonSchemaMixin):
sources: List[str] = field(default_factory=list)
# any node patches in this file. The entries are names, not unique ids!
patches: List[str] = field(default_factory=list)
# any macro patches in this file. The entries are pacakge, name pairs.
macro_patches: List[MacroKey] = field(default_factory=list)

@property
def search_key(self) -> Optional[str]:
Expand Down Expand Up @@ -336,7 +343,9 @@ def _find_by_name(self, name, package, subgraph, nodetype):
package,
nodetype)

def find_docs_by_name(self, name, package=None):
def find_docs_by_name(
self, name: str, package: Optional[str] = None
) -> Optional[ParsedDocumentation]:
for unique_id, doc in self.docs.items():
parts = unique_id.split('.')
if len(parts) != 2:
Expand Down Expand Up @@ -430,7 +439,29 @@ def add_nodes(self, new_nodes):
raise_duplicate_resource_name(node, self.nodes[unique_id])
self.nodes[unique_id] = node

def patch_nodes(self, patches):
def patch_macros(
self, patches: MutableMapping[MacroKey, ParsedMacroPatch]
) -> None:
for macro in self.macros.values():
key = (macro.package_name, macro.name)
patch = patches.pop(key, None)
if not patch:
continue
macro.patch(patch)

# log debug-level warning about nodes we couldn't find
if patches:
for patch in patches.values():
# since patches aren't nodes, we can't use the existing
# target_not_found warning
logger.debug((
'WARNING: Found documentation for macro "{}" which was '
'not found or is disabled').format(patch.name)
)

def patch_nodes(
self, patches: MutableMapping[str, ParsedNodePatch]
) -> None:
"""Patch nodes with the given dict of patches. Note that this consumes
the input!
This relies on the fact that all nodes have unique _name_ fields, not
Expand All @@ -443,12 +474,12 @@ def patch_nodes(self, patches):
for node in self.nodes.values():
if node.resource_type == NodeType.Source:
continue
# appease mypy
assert not isinstance(node, ParsedSourceDefinition)
patch = patches.pop(node.name, None)
if not patch:
continue
expected_key = node.resource_type.pluralize()
if expected_key == patch.yaml_key:
node.patch(patch)
if expected_key != patch.yaml_key:
if patch.yaml_key == 'models':
deprecations.warn(
Expand Down Expand Up @@ -477,7 +508,7 @@ def patch_nodes(self, patches):
# since patches aren't nodes, we can't use the existing
# target_not_found warning
logger.debug((
'WARNING: Found documentation for model "{}" which was '
'WARNING: Found documentation for resource "{}" which was '
'not found or is disabled').format(patch.name)
)

Expand Down
32 changes: 27 additions & 5 deletions core/dbt/contracts/graph/parsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,18 +450,27 @@ def json_schema(cls, embeddable=False):
return schema


# The parsed node update is only the 'patch', not the test. The test became a
# regular parsed node. Note that description and columns must be present, but
# may be empty.
@dataclass
class ParsedNodePatch(HasYamlMetadata, Replaceable):
class ParsedPatch(HasYamlMetadata, Replaceable):
name: str
description: str
columns: Dict[str, ColumnInfo]
docrefs: List[Docref]
meta: Dict[str, Any]


# The parsed node update is only the 'patch', not the test. The test became a
# regular parsed node. Note that description and columns must be present, but
# may be empty.
@dataclass
class ParsedNodePatch(ParsedPatch):
columns: Dict[str, ColumnInfo]


@dataclass
class ParsedMacroPatch(ParsedPatch):
pass


@dataclass
class MacroDependsOn(JsonSchemaMixin, Replaceable):
macros: List[str] = field(default_factory=list)
Expand All @@ -475,6 +484,10 @@ class ParsedMacro(UnparsedMacro, HasUniqueID):
tags: List[str] = field(default_factory=list)
# TODO: is this ever populated?
depends_on: MacroDependsOn = field(default_factory=MacroDependsOn)
docrefs: List[Docref] = field(default_factory=list)
description: str = field(default='')
meta: Dict[str, Any] = field(default_factory=dict)
patch_path: Optional[str] = None

def local_vars(self):
return {}
Expand All @@ -486,6 +499,15 @@ def generator(self) -> Callable[[Dict[str, Any]], Callable]:
"""
return MacroGenerator(self)

def patch(self, patch: ParsedMacroPatch):
self.patch_path: Optional[str] = patch.original_file_path
self.description = patch.description
self.docrefs = patch.docrefs
self.meta = patch.meta
if dbt.flags.STRICT_MODE:
assert isinstance(self, JsonSchemaMixin)
self.to_dict(validate=True)


@dataclass
class ParsedDocumentation(UnparsedDocumentationFile, HasUniqueID):
Expand Down
43 changes: 28 additions & 15 deletions core/dbt/contracts/graph/unparsed.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from dataclasses import dataclass, field
from datetime import timedelta
from typing import Optional, List, Union, Dict, Any
from typing import Optional, List, Union, Dict, Any, Sequence


@dataclass
Expand Down Expand Up @@ -55,31 +55,38 @@ class UnparsedRunHook(UnparsedNode):


@dataclass
class NamedTested(JsonSchemaMixin, Replaceable):
class HasDocs(JsonSchemaMixin, Replaceable):
name: str
description: str = ''
meta: Dict[str, Any] = field(default_factory=dict)
data_type: Optional[str] = None
tests: Optional[List[Union[Dict[str, Any], str]]] = None


TestDef = Union[Dict[str, Any], str]


@dataclass
class HasTests(HasDocs):
tests: Optional[List[TestDef]] = None

def __post_init__(self):
if self.tests is None:
self.tests = []


@dataclass
class UnparsedColumn(NamedTested):
class UnparsedColumn(HasTests):
tags: List[str] = field(default_factory=list)


@dataclass
class ColumnDescription(JsonSchemaMixin, Replaceable):
columns: List[UnparsedColumn] = field(default_factory=list)
class HasColumnDocs(JsonSchemaMixin, Replaceable):
columns: Sequence[HasDocs] = field(default_factory=list)


@dataclass
class NodeDescription(NamedTested):
pass
class HasColumnTests(HasColumnDocs):
columns: Sequence[UnparsedColumn] = field(default_factory=list)


@dataclass
Expand All @@ -90,9 +97,18 @@ class HasYamlMetadata(JsonSchemaMixin):


@dataclass
class UnparsedNodeUpdate(ColumnDescription, NodeDescription, HasYamlMetadata):
def __post_init__(self):
NodeDescription.__post_init__(self)
class UnparsedAnalysisUpdate(HasColumnDocs, HasDocs, HasYamlMetadata):
pass


@dataclass
class UnparsedNodeUpdate(HasColumnTests, HasTests, HasYamlMetadata):
pass


@dataclass
class UnparsedMacroUpdate(HasDocs, HasYamlMetadata):
pass


class TimePeriod(StrEnum):
Expand Down Expand Up @@ -201,7 +217,7 @@ class Quoting(JsonSchemaMixin, Mergeable):


@dataclass
class UnparsedSourceTableDefinition(ColumnDescription, NodeDescription):
class UnparsedSourceTableDefinition(HasColumnTests, HasTests):
loaded_at_field: Optional[str] = None
identifier: Optional[str] = None
quoting: Quoting = field(default_factory=Quoting)
Expand All @@ -213,9 +229,6 @@ class UnparsedSourceTableDefinition(ColumnDescription, NodeDescription):
)
tags: List[str] = field(default_factory=list)

def __post_init__(self):
NodeDescription.__post_init__(self)


@dataclass
class UnparsedSourceDefinition(JsonSchemaMixin, Replaceable):
Expand Down
35 changes: 23 additions & 12 deletions core/dbt/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import builtins
import functools
from typing import NoReturn
from typing import NoReturn, Optional

from dbt.logger import GLOBAL_LOGGER as logger
from dbt.node_types import NodeType
Expand Down Expand Up @@ -425,7 +425,9 @@ def doc_invalid_args(model, args):
model)


def doc_target_not_found(model, target_doc_name, target_doc_package):
def doc_target_not_found(
model, target_doc_name: str, target_doc_package: Optional[str]
) -> NoReturn:
target_package_string = ''

if target_doc_package is not None:
Expand Down Expand Up @@ -708,17 +710,26 @@ def raise_patch_targets_not_found(patches):
)


def raise_duplicate_patch_name(name, patch_1, patch_2):
def raise_duplicate_patch_name(patch_1, patch_2):
name = patch_1.name
raise_compiler_error(
'dbt found two schema.yml entries for the same model named {0}. '
'Models and their associated columns may only be described a single '
'time. To fix this, remove the model entry for for {0} in one of '
'these files:\n - {1}\n - {2}'
.format(
name,
patch_1.original_file_path,
patch_2.original_file_path,
)
f'dbt found two schema.yml entries for the same resource named '
f'{name}. Resources and their associated columns may only be '
f'described a single time. To fix this, remove the resource entry '
f'for {name} in one of these files:\n - '
f'{patch_1.original_file_path}\n - {patch_2.original_file_path}'
)


def raise_duplicate_macro_patch_name(patch_1, patch_2):
package_name = patch_1.package_name
name = patch_1.name
raise_compiler_error(
f'dbt found two schema.yml entries for the same macro in package '
f'{package_name} named {name}. Macros may only be described a single '
f'time. To fix this, remove the macros entry for for {name} in one '
f'of these files:'
f'\n - {patch_1.original_file_path}\n - {patch_2.original_file_path}'
)


Expand Down
2 changes: 2 additions & 0 deletions core/dbt/node_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ def documentable(cls) -> List['NodeType']:
cls.Seed,
cls.Snapshot,
cls.Source,
cls.Macro,
cls.Analysis,
]

def pluralize(self) -> str:
Expand Down
1 change: 1 addition & 0 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ def create_manifest(self) -> Manifest:
files=self.results.files,
)
manifest.patch_nodes(self.results.patches)
manifest.patch_macros(self.results.macro_patches)
manifest = ParserUtils.process_sources(
manifest, self.root_project.project_name
)
Expand Down
Loading

0 comments on commit abab3c7

Please sign in to comment.