Skip to content

Commit

Permalink
fix(lineage) Filter dataset -> dataset lineage edges if data is trans…
Browse files Browse the repository at this point in the history
…formed
  • Loading branch information
Chris Collins authored and Chris Collins committed Apr 27, 2022
1 parent 40cb83f commit 15f200d
Show file tree
Hide file tree
Showing 5 changed files with 224 additions and 11 deletions.
32 changes: 28 additions & 4 deletions datahub-web-react/src/Mocks.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import { GetGlossaryTermDocument, GetGlossaryTermQuery } from './graphql/glossar
import { GetEntityCountsDocument } from './graphql/app.generated';
import { GetMeDocument } from './graphql/me.generated';
import { ListRecommendationsDocument } from './graphql/recommendations.generated';
import { FetchedEntity } from './app/lineage/types';

const user1 = {
username: 'sdas',
Expand Down Expand Up @@ -115,7 +116,7 @@ const dataPlatform = {
},
};

const dataset1 = {
export const dataset1 = {
urn: 'urn:li:dataset:1',
type: EntityType.Dataset,
platform: {
Expand Down Expand Up @@ -208,7 +209,7 @@ const dataset1 = {
deprecation: null,
};

const dataset2 = {
export const dataset2 = {
urn: 'urn:li:dataset:2',
type: EntityType.Dataset,
platform: {
Expand All @@ -231,6 +232,7 @@ const dataset2 = {
name: 'Some Other Dataset',
description: 'This is some other dataset, so who cares!',
customProperties: [],
origin: 'PROD',
},
editableProperties: null,
created: {
Expand Down Expand Up @@ -1055,8 +1057,8 @@ export const dataJob1 = {
editableProperties: null,
inputOutput: {
__typename: 'DataJobInputOutput',
inputDatasets: [dataset3],
outputDatasets: [dataset3],
inputDatasets: [dataset5],
outputDatasets: [dataset6],
inputDatajobs: [],
},
globalTags: {
Expand Down Expand Up @@ -1298,6 +1300,28 @@ export const mlModel = {
deprecation: null,
} as MlModel;

export const dataset1FetchedEntity = {
urn: dataset1.urn,
name: dataset1.name,
type: dataset1.type,
upstreamChildren: [],
downstreamChildren: [
{ type: EntityType.Dataset, entity: dataset2 },
{ type: EntityType.DataJob, entity: dataJob1 },
],
} as FetchedEntity;

export const dataset2FetchedEntity = {
urn: dataset2.urn,
name: 'test name',
type: dataset2.type,
upstreamChildren: [
{ type: EntityType.Dataset, entity: dataset1 },
{ type: EntityType.DataJob, entity: dataJob1 },
],
downstreamChildren: [],
} as FetchedEntity;

export const mlModelGroup = {
__typename: 'MLModelGroup',
urn: 'urn:li:mlModelGroup:(urn:li:dataPlatform:sagemaker,another-group,PROD)',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import { dataset1, dataset2, dataJob1, dataset1FetchedEntity, dataset2FetchedEntity } from '../../../Mocks';
import { EntityType } from '../../../types.generated';
import { Direction, EntityAndType, FetchedEntity } from '../types';
import { shouldIncludeChildEntity } from '../utils/constructFetchedNode';

describe('shouldIncludeChildEntity', () => {
const parentChildren = [
{ entity: dataset1, type: dataset1.type },
{ entity: dataJob1, type: dataJob1.type },
] as EntityAndType[];

it('should return false if parent and child are datasets and the child has a datajob child that belongs to the parent children', () => {
const shouldBeIncluded = shouldIncludeChildEntity(
Direction.Upstream,
parentChildren,
dataset1FetchedEntity,
dataset2FetchedEntity,
);

expect(shouldBeIncluded).toBe(false);
});

it('should return true if the datajob is not a child of the parent', () => {
const parentChild = [{ entity: dataset1, type: dataset1.type }] as EntityAndType[];
const shouldBeIncluded = shouldIncludeChildEntity(
Direction.Upstream,
parentChild,
dataset1FetchedEntity,
dataset2FetchedEntity,
);

expect(shouldBeIncluded).toBe(true);
});

it('should return true if either parent or child is not a dataset', () => {
const fetchedDatajobEntity = { ...dataset1FetchedEntity, type: EntityType.DataJob };
let shouldBeIncluded = shouldIncludeChildEntity(
Direction.Upstream,
parentChildren,
fetchedDatajobEntity,
dataset2FetchedEntity,
);
expect(shouldBeIncluded).toBe(true);

const fetchedDashboardEntity = { ...dataset2FetchedEntity, type: EntityType.Dashboard };
shouldBeIncluded = shouldIncludeChildEntity(
Direction.Upstream,
parentChildren,
dataset1FetchedEntity,
fetchedDashboardEntity,
);
expect(shouldBeIncluded).toBe(true);
});

it('should return true if the parent has a datajob child that is not a child of the dataset child', () => {
const updatedDataset1FetchedEntity = {
...dataset1FetchedEntity,
downstreamChildren: [{ type: EntityType.Dataset, entity: dataset2 }],
} as FetchedEntity;

const shouldBeIncluded = shouldIncludeChildEntity(
Direction.Upstream,
parentChildren,
updatedDataset1FetchedEntity,
dataset2FetchedEntity,
);

expect(shouldBeIncluded).toBe(true);
});
});
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import {
dataJob1,
dataset3,
dataset3WithLineage,
dataset4,
Expand All @@ -7,7 +8,7 @@ import {
dataset5WithLineage,
dataset6WithLineage,
} from '../../../Mocks';
import { EntityType } from '../../../types.generated';
import { EntityType, RelationshipDirection } from '../../../types.generated';
import { getTestEntityRegistry } from '../../../utils/test-utils/TestPageContainer';
import { Direction, FetchedEntities } from '../types';
import constructTree from '../utils/constructTree';
Expand Down Expand Up @@ -299,4 +300,89 @@ describe('constructTree', () => {
],
});
});

it('should not include a Dataset as a child if that Dataset has a Datajob child which points to the parent', () => {
// dataset6 is downstream of dataset5 and datajob1, datajob 1 is downstream of dataset 5
const updatedDataset6WithLineage = {
...dataset6WithLineage,
downstream: null,
upstream: {
start: 0,
count: 2,
total: 2,
relationships: [
{
type: 'DownstreamOf',
direction: RelationshipDirection.Incoming,
entity: dataset5,
},
{
type: 'DownstreamOf',
direction: RelationshipDirection.Incoming,
entity: dataJob1,
},
],
},
};
const updatedDataset5WithLineage = {
...dataset5WithLineage,
downstream: {
...dataset5WithLineage.downstream,
relationships: [
...dataset5WithLineage.downstream.relationships,
{
type: 'DownstreamOf',
direction: RelationshipDirection.Outgoing,
entity: dataJob1,
},
],
},
};
const fetchedEntities = [
{ entity: updatedDataset5WithLineage, direction: Direction.Upstream, fullyFetched: true },
{ entity: dataJob1, direction: Direction.Upstream, fullyFetched: true },
];
const mockFetchedEntities = fetchedEntities.reduce(
(acc, entry) =>
extendAsyncEntities(
acc,
testEntityRegistry,
{ entity: entry.entity, type: entry.entity.type },
entry.fullyFetched,
),
{} as FetchedEntities,
);
expect(
constructTree(
{ entity: updatedDataset6WithLineage, type: EntityType.Dataset },
mockFetchedEntities,
Direction.Upstream,
testEntityRegistry,
),
).toEqual({
name: 'Display Name of Sixth',
expandedName: 'Fully Qualified Name of Sixth Test Dataset',
urn: 'urn:li:dataset:6',
type: EntityType.Dataset,
unexploredChildren: 0,
icon: undefined,
platform: 'Kafka',
subtype: undefined,
children: [
{
name: 'DataJobInfoName',
expandedName: undefined,
type: EntityType.DataJob,
unexploredChildren: 0,
urn: dataJob1.urn,
children: [],
countercurrentChildrenUrns: [],
icon: '',
status: null,
platform: 'Airflow',
subtype: undefined,
},
],
});
});
});
39 changes: 34 additions & 5 deletions datahub-web-react/src/app/lineage/utils/constructFetchedNode.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,28 @@
import { Direction, FetchedEntities, NodeData } from '../types';
import { EntityType } from '../../../types.generated';
import { Direction, EntityAndType, FetchedEntities, FetchedEntity, NodeData } from '../types';

// If there are nodes A, B, C and A -> B, B -> C, A -> C, where A and C are Datasets and B is a DataJob, we don't want to show edge A -> C
export function shouldIncludeChildEntity(
direction: Direction,
parentChildren?: EntityAndType[],
childEntity?: FetchedEntity | null,
parentEntity?: FetchedEntity,
) {
if (
parentEntity?.type === EntityType.Dataset &&
childEntity?.type === EntityType.Dataset &&
childEntity &&
parentChildren
) {
// we want the children of this child entity in the opposite direction of the parent to see if we connect back to the parent
const childrenKey = direction === Direction.Upstream ? 'downstreamChildren' : 'upstreamChildren';
return !childEntity[childrenKey]?.some(
(child) =>
child.type === EntityType.DataJob && parentChildren.some((c) => c.entity.urn === child.entity.urn),
);
}
return true;
}

export default function constructFetchedNode(
urn: string,
Expand All @@ -18,6 +42,8 @@ export default function constructFetchedNode(
return constructedNodes[urn];
}

const childrenKey = direction === Direction.Upstream ? 'upstreamChildren' : 'downstreamChildren';

if (fetchedNode && !constructedNodes[urn]) {
const node: NodeData = {
name: fetchedNode.name,
Expand All @@ -27,9 +53,7 @@ export default function constructFetchedNode(
subtype: fetchedNode.subtype,
icon: fetchedNode.icon,
unexploredChildren:
fetchedNode?.[direction === Direction.Upstream ? 'upstreamChildren' : 'downstreamChildren']?.filter(
(childUrn) => !(childUrn.entity.urn in fetchedEntities),
).length || 0,
fetchedNode?.[childrenKey]?.filter((childUrn) => !(childUrn.entity.urn in fetchedEntities)).length || 0,
countercurrentChildrenUrns:
fetchedNode?.[direction === Direction.Downstream ? 'upstreamChildren' : 'downstreamChildren']?.map(
(child) => child.entity.urn,
Expand All @@ -43,7 +67,7 @@ export default function constructFetchedNode(
constructedNodes[urn] = node;

node.children =
(fetchedNode?.[direction === Direction.Upstream ? 'upstreamChildren' : 'downstreamChildren']
(fetchedNode?.[childrenKey]
?.map((child) => {
if (child.entity.urn === node.urn) {
return null;
Expand All @@ -56,6 +80,11 @@ export default function constructFetchedNode(
newConstructionPath,
);
})
?.filter((child) => {
const childEntity = fetchedEntities[child?.urn || ''];
const parentChildren = fetchedNode[childrenKey];
return shouldIncludeChildEntity(direction, parentChildren, childEntity, fetchedNode);
})
.filter(Boolean) as Array<NodeData>) || [];

return node;
Expand Down
6 changes: 5 additions & 1 deletion datahub-web-react/src/app/lineage/utils/constructTree.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import EntityRegistry from '../../entity/EntityRegistry';
import { Direction, EntityAndType, FetchedEntities, NodeData } from '../types';
import constructFetchedNode from './constructFetchedNode';
import constructFetchedNode, { shouldIncludeChildEntity } from './constructFetchedNode';

export default function constructTree(
entityAndType: EntityAndType | null | undefined,
Expand Down Expand Up @@ -41,6 +41,10 @@ export default function constructTree(
root.urn || '',
]);
})
?.filter((child) => {
const childEntity = fetchedEntities[child?.urn || ''];
return shouldIncludeChildEntity(direction, children, childEntity, fetchedEntity);
})
?.filter(Boolean) as Array<NodeData>;
return root;
}

0 comments on commit 15f200d

Please sign in to comment.