Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

merge stable to release 1.1 #5300

Merged
merged 7 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions backend/infrahub/core/attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,21 @@ class Integer(BaseAttribute):
value: int
from_pool: Optional[str] = None

@classmethod
def validate_format(cls, value: Any, name: str, schema: AttributeSchema) -> None:
"""
Make sure boolean objects are not accepted as value. Need to override `validate_format`
as `isinstance(True, int)` is True.
"""

value_to_check = value
if schema.enum and isinstance(value, Enum):
value_to_check = value.value

# Note that we might want to do this check directly in parent function.
if value_to_check.__class__ != cls.type:
raise ValidationError({name: f"{value} is not a valid {schema.kind}"})


class IntegerOptional(Integer):
value: Optional[int]
Expand Down
7 changes: 4 additions & 3 deletions backend/infrahub/core/diff/calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ async def calculate_diff(
schema_manager=registry.schema,
from_time=from_time,
to_time=to_time,
previous_node_field_specifiers=previous_node_specifiers,
)
branch_diff_query = await DiffAllPathsQuery.init(
db=self.db,
Expand All @@ -45,9 +46,9 @@ async def calculate_diff(
diff_parser.read_result(query_result=query_result)

if base_branch.name != diff_branch.name:
branch_node_specifiers = diff_parser.get_node_field_specifiers_for_branch(branch_name=diff_branch.name)
new_node_field_specifiers = branch_node_specifiers - (previous_node_specifiers or set())
current_node_field_specifiers = (previous_node_specifiers or set()) - new_node_field_specifiers
new_node_field_specifiers = diff_parser.get_new_node_field_specifiers()
current_node_field_specifiers = diff_parser.get_current_node_field_specifiers()

base_diff_query = await DiffAllPathsQuery.init(
db=self.db,
branch=base_branch,
Expand Down
59 changes: 32 additions & 27 deletions backend/infrahub/core/diff/coordinator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from dataclasses import dataclass, field
from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Iterable

from infrahub import lock
from infrahub.core import registry
Expand All @@ -14,7 +14,6 @@
EnrichedDiffs,
NameTrackingId,
NodeFieldSpecifier,
TimeRange,
TrackingId,
)

Expand Down Expand Up @@ -219,6 +218,30 @@ async def recalculate(
log.debug(f"Diff recalculation complete for {base_branch.name} - {diff_branch.name}")
return enriched_diffs.diff_branch_diff

def _get_ordered_diff_pairs(
self, diff_pairs: Iterable[EnrichedDiffs], allow_overlap: bool = False
) -> list[EnrichedDiffs]:
ordered_diffs = sorted(diff_pairs, key=lambda d: d.diff_branch_diff.from_time)
if allow_overlap:
return ordered_diffs
ordered_diffs_no_overlaps: list[EnrichedDiffs] = []
for candidate_diff_pair in ordered_diffs:
if not ordered_diffs_no_overlaps:
ordered_diffs_no_overlaps.append(candidate_diff_pair)
continue
# no time overlap
previous_diff = ordered_diffs_no_overlaps[-1].diff_branch_diff
candidate_diff = candidate_diff_pair.diff_branch_diff
if previous_diff.to_time <= candidate_diff.from_time:
ordered_diffs_no_overlaps.append(candidate_diff_pair)
continue
previous_interval = previous_diff.time_range
candidate_interval = candidate_diff.time_range
# keep the diff that covers the larger time frame
if candidate_interval > previous_interval:
ordered_diffs_no_overlaps[-1] = candidate_diff_pair
return ordered_diffs_no_overlaps

async def _update_diffs(
self,
base_branch: Branch,
Expand Down Expand Up @@ -272,15 +295,17 @@ async def _get_aggregated_enriched_diffs(
if not partial_enriched_diffs:
return await self._get_enriched_diff(diff_request=diff_request, is_incremental_diff=False)

remaining_diffs = sorted(partial_enriched_diffs, key=lambda d: d.diff_branch_diff.from_time)
ordered_diffs = self._get_ordered_diff_pairs(diff_pairs=partial_enriched_diffs, allow_overlap=False)
ordered_diff_reprs = [repr(d) for d in ordered_diffs]
log.debug(f"Ordered diffs for aggregation: {ordered_diff_reprs}")
current_time = diff_request.from_time
previous_diffs: EnrichedDiffs | None = None
while current_time < diff_request.to_time:
if remaining_diffs and remaining_diffs[0].diff_branch_diff.from_time == current_time:
current_diffs = remaining_diffs.pop(0)
if ordered_diffs and ordered_diffs[0].diff_branch_diff.from_time == current_time:
current_diffs = ordered_diffs.pop(0)
else:
if remaining_diffs:
end_time = remaining_diffs[0].diff_branch_diff.from_time
if ordered_diffs:
end_time = ordered_diffs[0].diff_branch_diff.from_time
else:
end_time = diff_request.to_time
if previous_diffs is None:
Expand Down Expand Up @@ -326,26 +351,6 @@ async def _get_enriched_diff(self, diff_request: EnrichedDiffRequest, is_increme
enriched_diff_pair = await self.diff_enricher.enrich(calculated_diffs=calculated_diff_pair)
return enriched_diff_pair

def _get_missing_time_ranges(
self, time_ranges: list[TimeRange], from_time: Timestamp, to_time: Timestamp
) -> list[TimeRange]:
if not time_ranges:
return [TimeRange(from_time=from_time, to_time=to_time)]
sorted_time_ranges = sorted(time_ranges, key=lambda tr: tr.from_time)
missing_time_ranges = []
if sorted_time_ranges[0].from_time > from_time:
missing_time_ranges.append(TimeRange(from_time=from_time, to_time=sorted_time_ranges[0].from_time))
index = 0
while index < len(sorted_time_ranges) - 1:
this_diff = sorted_time_ranges[index]
next_diff = sorted_time_ranges[index + 1]
if this_diff.to_time < next_diff.from_time:
missing_time_ranges.append(TimeRange(from_time=this_diff.to_time, to_time=next_diff.from_time))
index += 1
if sorted_time_ranges[-1].to_time < to_time:
missing_time_ranges.append(TimeRange(from_time=sorted_time_ranges[-1].to_time, to_time=to_time))
return missing_time_ranges

def _get_node_field_specifiers(self, enriched_diff: EnrichedDiffRoot) -> set[NodeFieldSpecifier]:
specifiers: set[NodeFieldSpecifier] = set()
schema_branch = registry.schema.get_schema_branch(name=enriched_diff.diff_branch_name)
Expand Down
16 changes: 16 additions & 0 deletions backend/infrahub/core/diff/model/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from neo4j.graph import Node as Neo4jNode
from neo4j.graph import Path as Neo4jPath
from neo4j.graph import Relationship as Neo4jRelationship
from pendulum import Interval

from infrahub.graphql.initialization import GraphqlContext

Expand Down Expand Up @@ -415,6 +416,10 @@ class EnrichedDiffRoot(BaseSummary):
def __hash__(self) -> int:
return hash(self.uuid)

@property
def time_range(self) -> Interval:
return self.to_time.obj - self.from_time.obj

def get_nodes_without_parents(self) -> set[EnrichedDiffNode]:
nodes_with_parent_uuids = set()
for n in self.nodes:
Expand Down Expand Up @@ -504,6 +509,17 @@ class EnrichedDiffs:
base_branch_diff: EnrichedDiffRoot
diff_branch_diff: EnrichedDiffRoot

def __repr__(self) -> str:
return (
f"{self.__class__.__name__}("
f"branch_uuid={self.diff_branch_diff.uuid},"
f"base_uuid={self.base_branch_diff.uuid},"
f"branch_name={self.diff_branch_name},"
f"base_name={self.base_branch_name},"
f"from_time={self.diff_branch_diff.from_time},"
f"to_time={self.diff_branch_diff.to_time})"
)

@classmethod
def from_calculated_diffs(cls, calculated_diffs: CalculatedDiffs) -> EnrichedDiffs:
base_branch_diff = EnrichedDiffRoot.from_calculated_diff(
Expand Down
70 changes: 51 additions & 19 deletions backend/infrahub/core/diff/query_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,20 +153,21 @@ def to_diff_property(self, from_time: Timestamp) -> DiffProperty:
class DiffAttributeIntermediate(TrackedStatusUpdates):
uuid: str
name: str
from_time: Timestamp
properties_by_type: dict[DatabaseEdgeType, DiffPropertyIntermediate] = field(default_factory=dict)

def track_database_path(self, database_path: DatabasePath) -> None:
if database_path.attribute_changed_at in self.timestamp_status_map:
return
self.timestamp_status_map[database_path.attribute_changed_at] = database_path.attribute_status

def to_diff_attribute(self, from_time: Timestamp, include_unchanged: bool) -> DiffAttribute:
def to_diff_attribute(self, include_unchanged: bool) -> DiffAttribute:
properties = []
for prop in self.properties_by_type.values():
diff_prop = prop.to_diff_property(from_time=from_time)
diff_prop = prop.to_diff_property(from_time=self.from_time)
if include_unchanged or diff_prop.action is not DiffAction.UNCHANGED:
properties.append(diff_prop)
action, changed_at = self.get_action_and_timestamp(from_time=from_time)
action, changed_at = self.get_action_and_timestamp(from_time=self.from_time)
if not properties or all(p.action is DiffAction.UNCHANGED for p in properties):
action = DiffAction.UNCHANGED
return DiffAttribute(
Expand Down Expand Up @@ -308,6 +309,7 @@ class DiffRelationshipIntermediate:
name: str
identifier: str
cardinality: RelationshipCardinality
from_time: Timestamp
properties_by_db_id: dict[str, set[DiffRelationshipPropertyIntermediate]] = field(default_factory=dict)
_single_relationship_list: list[DiffSingleRelationshipIntermediate] = field(default_factory=list)

Expand Down Expand Up @@ -354,23 +356,22 @@ def _index_relationships(self) -> None:
for single_relationship_properties in self.properties_by_db_id.values()
]

def to_diff_relationship(self, from_time: Timestamp, include_unchanged: bool) -> DiffRelationship:
def to_diff_relationship(self, include_unchanged: bool) -> DiffRelationship:
self._index_relationships()
single_relationships = [
sr.get_final_single_relationship(from_time=from_time, include_unchanged=include_unchanged)
sr.get_final_single_relationship(from_time=self.from_time, include_unchanged=include_unchanged)
for sr in self._single_relationship_list
]
last_changed_relationship = max(single_relationships, key=lambda r: r.changed_at)
last_changed_at = last_changed_relationship.changed_at
action = DiffAction.UPDATED
if last_changed_at < from_time or all(sr.action is DiffAction.UNCHANGED for sr in single_relationships):
if last_changed_at < self.from_time or all(sr.action is DiffAction.UNCHANGED for sr in single_relationships):
action = DiffAction.UNCHANGED
if (
self.cardinality is RelationshipCardinality.ONE
and len(single_relationships) == 1
and single_relationships[0].action in (DiffAction.ADDED, DiffAction.REMOVED)
):
action = single_relationships[0].action
# bubble up action, excluding UNCHANGED
if self.cardinality is RelationshipCardinality.ONE:
actions = {sr.action for sr in single_relationships if sr.action is not DiffAction.UNCHANGED}
if len(actions) == 1:
action = actions.pop()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ignore UNCHANGED individual relationships when determining the overall action for this relationship group (kind of equivalent to RelationshipManager)

return DiffRelationship(
name=self.name,
changed_at=last_changed_at,
Expand All @@ -391,13 +392,13 @@ class DiffNodeIntermediate(TrackedStatusUpdates):
def to_diff_node(self, from_time: Timestamp, include_unchanged: bool) -> DiffNode:
attributes = []
for attr in self.attributes_by_name.values():
diff_attr = attr.to_diff_attribute(from_time=from_time, include_unchanged=include_unchanged)
diff_attr = attr.to_diff_attribute(include_unchanged=include_unchanged)
if include_unchanged or diff_attr.action is not DiffAction.UNCHANGED:
attributes.append(diff_attr)
action, changed_at = self.get_action_and_timestamp(from_time=from_time)
relationships = []
for rel in self.relationships_by_name.values():
diff_rel = rel.to_diff_relationship(from_time=from_time, include_unchanged=include_unchanged)
diff_rel = rel.to_diff_relationship(include_unchanged=include_unchanged)
if include_unchanged or diff_rel.action is not DiffAction.UNCHANGED:
relationships.append(diff_rel)
if not attributes and not relationships:
Expand Down Expand Up @@ -447,6 +448,7 @@ def __init__(
schema_manager: SchemaManager,
from_time: Timestamp,
to_time: Optional[Timestamp] = None,
previous_node_field_specifiers: set[NodeFieldSpecifier] | None = None,
) -> None:
self.base_branch_name = base_branch.name
self.diff_branch_name = diff_branch.name
Expand All @@ -460,6 +462,7 @@ def __init__(
self.diff_branched_from_time = Timestamp(diff_branch.get_branched_from())
self._diff_root_by_branch: dict[str, DiffRootIntermediate] = {}
self._final_diff_root_by_branch: dict[str, DiffRoot] = {}
self._previous_node_field_specifiers = previous_node_field_specifiers or set()

def get_branches(self) -> set[str]:
return set(self._final_diff_root_by_branch.keys())
Expand All @@ -471,11 +474,11 @@ def get_diff_root_for_branch(self, branch: str) -> DiffRoot:
return self._final_diff_root_by_branch[branch]
return DiffRoot(from_time=self.from_time, to_time=self.to_time, uuid=str(uuid4()), branch=branch, nodes=[])

def get_node_field_specifiers_for_branch(self, branch_name: str) -> set[NodeFieldSpecifier]:
if branch_name not in self._diff_root_by_branch:
def get_diff_node_field_specifiers(self) -> set[NodeFieldSpecifier]:
if self.diff_branch_name not in self._diff_root_by_branch:
return set()
node_field_specifiers: set[NodeFieldSpecifier] = set()
diff_root = self._diff_root_by_branch[branch_name]
diff_root = self._diff_root_by_branch[self.diff_branch_name]
for node in diff_root.nodes_by_id.values():
node_field_specifiers.update(
NodeFieldSpecifier(node_uuid=node.uuid, field_name=attribute_name)
Expand All @@ -487,6 +490,16 @@ def get_node_field_specifiers_for_branch(self, branch_name: str) -> set[NodeFiel
)
return node_field_specifiers

def get_new_node_field_specifiers(self) -> set[NodeFieldSpecifier]:
branch_node_specifiers = self.get_diff_node_field_specifiers()
new_node_field_specifiers = branch_node_specifiers - self._previous_node_field_specifiers
return new_node_field_specifiers

def get_current_node_field_specifiers(self) -> set[NodeFieldSpecifier]:
new_node_field_specifiers = self.get_new_node_field_specifiers()
current_node_field_specifiers = self._previous_node_field_specifiers - new_node_field_specifiers
return current_node_field_specifiers
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to add some caching for these b/c we should really only need to calculate these things one time, but that can come in a later PR


def read_result(self, query_result: QueryResult) -> None:
path = query_result.get_path(label="diff_path")
database_path = DatabasePath.from_cypher_path(cypher_path=path)
Expand Down Expand Up @@ -546,7 +559,9 @@ def _update_attribute_level(self, database_path: DatabasePath, diff_node: DiffNo
relationship_schema = self._get_relationship_schema(database_path=database_path, node_schema=node_schema)
if not relationship_schema:
return
diff_relationship = self._get_diff_relationship(diff_node=diff_node, relationship_schema=relationship_schema)
diff_relationship = self._get_diff_relationship(
diff_node=diff_node, relationship_schema=relationship_schema, database_path=database_path
)
diff_relationship.add_path(
database_path=database_path, diff_from_time=self.from_time, diff_to_time=self.to_time
)
Expand All @@ -555,10 +570,16 @@ def _get_diff_attribute(
self, database_path: DatabasePath, diff_node: DiffNodeIntermediate
) -> DiffAttributeIntermediate:
attribute_name = database_path.attribute_name
node_field_specifier = NodeFieldSpecifier(node_uuid=diff_node.uuid, field_name=attribute_name)
branch_name = database_path.deepest_branch
from_time = self.from_time
if branch_name == self.base_branch_name and node_field_specifier in self.get_new_node_field_specifiers():
from_time = self.diff_branched_from_time
if attribute_name not in diff_node.attributes_by_name:
diff_node.attributes_by_name[attribute_name] = DiffAttributeIntermediate(
uuid=database_path.attribute_id,
name=attribute_name,
from_time=from_time,
)
diff_attribute = diff_node.attributes_by_name[attribute_name]
diff_attribute.track_database_path(database_path=database_path)
Expand All @@ -583,14 +604,25 @@ def _update_attribute_property(
)

def _get_diff_relationship(
self, diff_node: DiffNodeIntermediate, relationship_schema: RelationshipSchema
self,
diff_node: DiffNodeIntermediate,
relationship_schema: RelationshipSchema,
database_path: DatabasePath,
) -> DiffRelationshipIntermediate:
diff_relationship = diff_node.relationships_by_name.get(relationship_schema.name)
if not diff_relationship:
node_field_specifier = NodeFieldSpecifier(
node_uuid=diff_node.uuid, field_name=relationship_schema.get_identifier()
)
branch_name = database_path.deepest_branch
from_time = self.from_time
if branch_name == self.base_branch_name and node_field_specifier in self.get_new_node_field_specifiers():
from_time = self.diff_branched_from_time
diff_relationship = DiffRelationshipIntermediate(
name=relationship_schema.name,
cardinality=relationship_schema.cardinality,
identifier=relationship_schema.get_identifier(),
from_time=from_time,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we use the from_time here and above in _get_diff_attribute when determining the action (ADDED/UPDATED/REMOVED) for a given database path. when we are getting nodes on the base branch (always main for now), we retrieve both completely new nodes that are not yet part of this diff and updated nodes that are already part of this diff. we need to use different from_times when determining their actions and this is how we accomplish that

)
diff_node.relationships_by_name[relationship_schema.name] = diff_relationship
return diff_relationship
Expand Down
Loading
Loading