diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index f83d1ff85e15d..d6b1fcdb6dd6e 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -1030,6 +1030,7 @@ jobs: TEST_STRATEGY: ${{ matrix.test_strategy }} run: | echo "$DATAHUB_VERSION" + ./gradlew --stop ./smoke-test/smoke.sh - name: Disk Check run: df -h . && docker images diff --git a/build.gradle b/build.gradle index a83d878f46965..302b37281798f 100644 --- a/build.gradle +++ b/build.gradle @@ -63,7 +63,7 @@ buildscript { buildscript.repositories.addAll(project.repositories) dependencies { classpath 'com.linkedin.pegasus:gradle-plugins:' + pegasusVersion - classpath 'com.github.node-gradle:gradle-node-plugin:7.0.1' + classpath 'com.github.node-gradle:gradle-node-plugin:7.0.2' classpath 'io.acryl.gradle.plugin:gradle-avro-plugin:0.2.0' classpath 'org.springframework.boot:spring-boot-gradle-plugin:' + springBootVersion classpath "io.codearte.gradle.nexus:gradle-nexus-staging-plugin:0.30.0" @@ -267,7 +267,7 @@ project.ext.externalDependency = [ 'testContainersOpenSearch': 'org.opensearch:opensearch-testcontainers:2.0.0', 'typesafeConfig':'com.typesafe:config:1.4.1', 'wiremock':'com.github.tomakehurst:wiremock:2.10.0', - 'zookeeper': 'org.apache.zookeeper:zookeeper:3.6.2', + 'zookeeper': 'org.apache.zookeeper:zookeeper:3.8.4', 'wire': 'com.squareup.wire:wire-compiler:3.7.1', 'charle': 'com.charleskorn.kaml:kaml:0.53.0', 'common': 'commons-io:commons-io:2.7', diff --git a/datahub-frontend/run/frontend.env b/datahub-frontend/run/frontend.env index 2c92febfcfed1..4b32fb64e1008 100644 --- a/datahub-frontend/run/frontend.env +++ b/datahub-frontend/run/frontend.env @@ -44,7 +44,7 @@ ELASTIC_CLIENT_PORT=9200 # AUTH_JAAS_ENABLED=false # Change to disable Metadata Service Authentication -METADATA_SERVICE_AUTH_ENABLED=true +# METADATA_SERVICE_AUTH_ENABLED=false # Change to override max header count defaults DATAHUB_AKKA_MAX_HEADER_COUNT=64 diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index d38c1030b61be..5b265b6714452 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -1192,13 +1192,15 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { .dataFetcher( "updateDescription", new UpdateDescriptionResolver(entityService, this.entityClient)) - .dataFetcher("addOwner", new AddOwnerResolver(entityService)) - .dataFetcher("addOwners", new AddOwnersResolver(entityService)) - .dataFetcher("batchAddOwners", new BatchAddOwnersResolver(entityService)) - .dataFetcher("removeOwner", new RemoveOwnerResolver(entityService)) - .dataFetcher("batchRemoveOwners", new BatchRemoveOwnersResolver(entityService)) + .dataFetcher("addOwner", new AddOwnerResolver(entityService, entityClient)) + .dataFetcher("addOwners", new AddOwnersResolver(entityService, entityClient)) + .dataFetcher( + "batchAddOwners", new BatchAddOwnersResolver(entityService, entityClient)) + .dataFetcher("removeOwner", new RemoveOwnerResolver(entityService, entityClient)) + .dataFetcher( + "batchRemoveOwners", new BatchRemoveOwnersResolver(entityService, entityClient)) .dataFetcher("addLink", new AddLinkResolver(entityService, this.entityClient)) - .dataFetcher("removeLink", new RemoveLinkResolver(entityService)) + .dataFetcher("removeLink", new RemoveLinkResolver(entityService, entityClient)) .dataFetcher("addGroupMembers", new AddGroupMembersResolver(this.groupService)) .dataFetcher("removeGroupMembers", new RemoveGroupMembersResolver(this.groupService)) .dataFetcher("createGroup", new CreateGroupResolver(this.groupService)) @@ -1212,7 +1214,8 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("deleteDomain", new DeleteDomainResolver(entityClient)) .dataFetcher( "setDomain", new SetDomainResolver(this.entityClient, this.entityService)) - .dataFetcher("batchSetDomain", new BatchSetDomainResolver(this.entityService)) + .dataFetcher( + "batchSetDomain", new BatchSetDomainResolver(this.entityService, entityClient)) .dataFetcher( "updateDeprecation", new UpdateDeprecationResolver(this.entityClient, this.entityService)) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java index 4847aea224ccd..0fe6e5de0cac6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java @@ -2,6 +2,7 @@ import static com.linkedin.metadata.Constants.CORP_USER_ENTITY_NAME; import static com.linkedin.metadata.Constants.CORP_USER_STATUS_LAST_MODIFIED_FIELD_NAME; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -30,7 +31,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -153,12 +153,11 @@ private SearchResult searchForNewUsers(@Nonnull final OperationContext opContext .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField(CORP_USER_STATUS_LAST_MODIFIED_FIELD_NAME) - .setCondition(Condition.GREATER_THAN) - .setValue( - String.valueOf( - trailingMonthDateRange.getStart())))))))), + buildCriterion( + CORP_USER_STATUS_LAST_MODIFIED_FIELD_NAME, + Condition.GREATER_THAN, + String.valueOf( + trailingMonthDateRange.getStart())))))))), Collections.singletonList( new SortCriterion() .setField(CORP_USER_STATUS_LAST_MODIFIED_FIELD_NAME) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java index 5f873b4bebab3..b1cd0e9165129 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java @@ -2,12 +2,12 @@ import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.*; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.datahub.authentication.Authentication; import com.fasterxml.jackson.core.StreamReadConstraints; import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.exception.ValidationException; import com.linkedin.datahub.graphql.generated.AndFilterInput; @@ -154,38 +154,22 @@ public static Filter buildFilter( // Translates a FacetFilterInput (graphql input class) into Criterion (our internal model) public static Criterion criterionFromFilter(final FacetFilterInput filter) { - Criterion result = new Criterion(); - result.setField(filter.getField()); - - // `value` is deprecated in place of `values`- this is to support old query patterns. If values - // is provided, - // this statement will be skipped - if (filter.getValues() == null && filter.getValue() != null) { - result.setValues(new StringArray(filter.getValue())); - result.setValue(filter.getValue()); - } else if (filter.getValues() != null) { - result.setValues(new StringArray(filter.getValues())); - if (!filter.getValues().isEmpty()) { - result.setValue(filter.getValues().get(0)); - } else { - result.setValue(""); - } - } else { - result.setValues(new StringArray()); - result.setValue(""); - } + final Condition condition; if (filter.getCondition() != null) { - result.setCondition(Condition.valueOf(filter.getCondition().toString())); + condition = Condition.valueOf(filter.getCondition().toString()); } else { - result.setCondition(Condition.EQUAL); + condition = Condition.EQUAL; } - if (filter.getNegated() != null) { - result.setNegated(filter.getNegated()); + final List values; + if (filter.getValues() == null && filter.getValue() != null) { + values = Collections.singletonList(filter.getValue()); + } else { + values = filter.getValues(); } - return result; + return buildCriterion(filter.getField(), condition, filter.getNegated(), values); } public static Filter viewFilter( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java index 8372b6b5126a3..4331fe1193fb4 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java @@ -1,12 +1,12 @@ package com.linkedin.datahub.graphql.resolvers.auth; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.google.common.collect.ImmutableSet; import com.linkedin.common.EntityRelationship; import com.linkedin.common.EntityRelationships; import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; @@ -19,7 +19,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.RelationshipDirection; @@ -199,41 +198,28 @@ private Filter buildFilterToGetPolicies( ConjunctiveCriterionArray conjunctiveCriteria = new ConjunctiveCriterionArray(); final CriterionArray allUsersAndArray = new CriterionArray(); - allUsersAndArray.add( - new Criterion().setField("allUsers").setValue("true").setCondition(Condition.EQUAL)); + allUsersAndArray.add(buildCriterion("allUsers", Condition.EQUAL, "true")); conjunctiveCriteria.add(new ConjunctiveCriterion().setAnd(allUsersAndArray)); final CriterionArray allGroupsAndArray = new CriterionArray(); - allGroupsAndArray.add( - new Criterion().setField("allGroups").setValue("true").setCondition(Condition.EQUAL)); + allGroupsAndArray.add(buildCriterion("allGroups", Condition.EQUAL, "true")); conjunctiveCriteria.add(new ConjunctiveCriterion().setAnd(allGroupsAndArray)); if (user != null && !user.isEmpty()) { final CriterionArray userAndArray = new CriterionArray(); - userAndArray.add( - new Criterion().setField("users").setValue(user).setCondition(Condition.EQUAL)); + userAndArray.add(buildCriterion("users", Condition.EQUAL, user)); conjunctiveCriteria.add(new ConjunctiveCriterion().setAnd(userAndArray)); } if (groups != null && !groups.isEmpty()) { final CriterionArray groupsAndArray = new CriterionArray(); - groupsAndArray.add( - new Criterion() - .setField("groups") - .setValue("") - .setValues(new StringArray(groups)) - .setCondition(Condition.EQUAL)); + groupsAndArray.add(buildCriterion("groups", Condition.EQUAL, groups)); conjunctiveCriteria.add(new ConjunctiveCriterion().setAnd(groupsAndArray)); } if (roles != null && !roles.isEmpty()) { final CriterionArray rolesAndArray = new CriterionArray(); - rolesAndArray.add( - new Criterion() - .setField("roles") - .setValue("") - .setValues(new StringArray(roles)) - .setCondition(Condition.EQUAL)); + rolesAndArray.add(buildCriterion("roles", Condition.EQUAL, roles)); conjunctiveCriteria.add(new ConjunctiveCriterion().setAnd(rolesAndArray)); } return new Filter().setOr(conjunctiveCriteria); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java index 5a3207633c07c..82a476ec56ddc 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.container; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.google.common.collect.ImmutableList; import com.linkedin.datahub.graphql.QueryContext; @@ -74,10 +75,7 @@ public CompletableFuture get(final DataFetchingEnvironment enviro try { final Criterion filterCriterion = - new Criterion() - .setField(CONTAINER_FIELD_NAME + ".keyword") - .setCondition(Condition.EQUAL) - .setValue(urn); + buildCriterion(CONTAINER_FIELD_NAME + ".keyword", Condition.EQUAL, urn); return UrnSearchResultsMapper.map( context, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsResolver.java index 7e4a9c8a80388..70e62de20b567 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.dashboard; import static com.linkedin.datahub.graphql.resolvers.dashboard.DashboardUsageStatsUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; @@ -14,7 +15,6 @@ import com.linkedin.datahub.graphql.types.dashboard.mappers.DashboardUsageMetricMapper; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.EnvelopedAspect; -import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; import com.linkedin.metadata.query.filter.Criterion; @@ -102,11 +102,7 @@ private List getDashboardUsageMetrics( final ArrayList criteria = new ArrayList<>(); // Add filter for absence of eventGranularity - only consider absolute stats - Criterion excludeTimeBucketsCriterion = - new Criterion() - .setField(ES_FIELD_EVENT_GRANULARITY) - .setCondition(Condition.IS_NULL) - .setValue(""); + Criterion excludeTimeBucketsCriterion = buildIsNullCriterion(ES_FIELD_EVENT_GRANULARITY); criteria.add(excludeTimeBucketsCriterion); filter.setOr( new ConjunctiveCriterionArray( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsUtils.java index 93c08d37c2e36..9c0b1f450b831 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsUtils.java @@ -1,5 +1,8 @@ package com.linkedin.datahub.graphql.resolvers.dashboard; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; +import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion; + import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; @@ -319,27 +322,22 @@ public static Filter createUsageFilter( final ArrayList criteria = new ArrayList<>(); // Add filter for urn == dashboardUrn - Criterion dashboardUrnCriterion = - new Criterion().setField(ES_FIELD_URN).setCondition(Condition.EQUAL).setValue(dashboardUrn); + Criterion dashboardUrnCriterion = buildCriterion(ES_FIELD_URN, Condition.EQUAL, dashboardUrn); criteria.add(dashboardUrnCriterion); if (startTime != null) { // Add filter for start time Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(Long.toString(startTime)); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, Long.toString(startTime)); criteria.add(startTimeCriterion); } if (endTime != null) { // Add filter for end time Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(Long.toString(endTime)); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.LESS_THAN_OR_EQUAL_TO, Long.toString(endTime)); criteria.add(endTimeCriterion); } @@ -348,18 +346,11 @@ public static Filter createUsageFilter( // stats // since unit is mandatory, we assume if eventGranularity contains unit, then it is not null Criterion onlyTimeBucketsCriterion = - new Criterion() - .setField(ES_FIELD_EVENT_GRANULARITY) - .setCondition(Condition.CONTAIN) - .setValue("unit"); + buildCriterion(ES_FIELD_EVENT_GRANULARITY, Condition.CONTAIN, "unit"); criteria.add(onlyTimeBucketsCriterion); } else { // Add filter for absence of eventGranularity - only consider absolute stats - Criterion excludeTimeBucketsCriterion = - new Criterion() - .setField(ES_FIELD_EVENT_GRANULARITY) - .setCondition(Condition.IS_NULL) - .setValue(""); + Criterion excludeTimeBucketsCriterion = buildIsNullCriterion(ES_FIELD_EVENT_GRANULARITY); criteria.add(excludeTimeBucketsCriterion); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetHealthResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetHealthResolver.java index f38cf80f36ceb..6db581504f93c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetHealthResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetHealthResolver.java @@ -1,5 +1,8 @@ package com.linkedin.datahub.graphql.resolvers.dataset; +import static com.linkedin.metadata.Constants.ASSERTION_RUN_EVENT_STATUS_COMPLETE; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.collect.ImmutableList; @@ -197,16 +200,12 @@ private Filter createAssertionsFilter(final String datasetUrn) { final ArrayList criteria = new ArrayList<>(); // Add filter for asserteeUrn == datasetUrn - Criterion datasetUrnCriterion = - new Criterion().setField("asserteeUrn").setCondition(Condition.EQUAL).setValue(datasetUrn); + Criterion datasetUrnCriterion = buildCriterion("asserteeUrn", Condition.EQUAL, datasetUrn); criteria.add(datasetUrnCriterion); // Add filter for result == result Criterion startTimeCriterion = - new Criterion() - .setField("status") - .setCondition(Condition.EQUAL) - .setValue(Constants.ASSERTION_RUN_EVENT_STATUS_COMPLETE); + buildCriterion("status", Condition.EQUAL, ASSERTION_RUN_EVENT_STATUS_COMPLETE); criteria.add(startTimeCriterion); filter.setOr( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java index c6265380fb2fd..c27fa1d195a76 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; @@ -69,10 +70,7 @@ public CompletableFuture get(final DataFetchingEnvironment enviro final CriterionArray criteria = new CriterionArray(); final Criterion filterCriterion = - new Criterion() - .setField(DOMAINS_FIELD_NAME + ".keyword") - .setCondition(Condition.EQUAL) - .setValue(urn); + buildCriterion(DOMAINS_FIELD_NAME + ".keyword", Condition.EQUAL, urn); criteria.add(filterCriterion); if (input.getFilters() != null) { input diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolver.java index 6ada447ca59ee..adbaae368a418 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolver.java @@ -44,7 +44,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DomainUtils.isAuthorizedToUpdateDomainsForEntity( - environment.getContext(), entityUrn)) { + environment.getContext(), entityUrn, _entityClient)) { throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolver.java index 783cf250a7ca6..b0bb206a8827b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolver.java @@ -43,7 +43,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DomainUtils.isAuthorizedToUpdateDomainsForEntity( - environment.getContext(), entityUrn)) { + environment.getContext(), entityUrn, _entityClient)) { throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolver.java index 1e99ea120354e..69ad8658b23ba 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolver.java @@ -46,8 +46,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw return GraphQLConcurrencyUtils.supplyAsync( () -> { - final Urn parentUrn = GlossaryUtils.getParentUrn(urn, context, _entityClient); - if (GlossaryUtils.canManageChildrenEntities(context, parentUrn, _entityClient)) { + if (GlossaryUtils.canUpdateGlossaryEntity(urn, context, _entityClient)) { try { final TermRelationshipType relationshipType = input.getRelationshipType(); final List termUrns = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java index 26f0c61de1b0f..c663bd2cf9b9f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java @@ -29,11 +29,10 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); final Urn entityUrn = Urn.createFromString(environment.getArgument("urn")); - final Urn parentNodeUrn = GlossaryUtils.getParentUrn(entityUrn, context, _entityClient); return GraphQLConcurrencyUtils.supplyAsync( () -> { - if (GlossaryUtils.canManageChildrenEntities(context, parentNodeUrn, _entityClient)) { + if (GlossaryUtils.canUpdateGlossaryEntity(entityUrn, context, _entityClient)) { if (!_entityService.exists(context.getOperationContext(), entityUrn, true)) { throw new RuntimeException(String.format("This urn does not exist: %s", entityUrn)); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolver.java index 451abfdaf1c06..8ec0ecae684a1 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.glossary; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; @@ -15,7 +16,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntity; @@ -84,11 +84,7 @@ public CompletableFuture get( private Filter buildGlossaryEntitiesFilter() { CriterionArray array = new CriterionArray( - ImmutableList.of( - new Criterion() - .setField("hasParentNode") - .setValue("false") - .setCondition(Condition.EQUAL))); + ImmutableList.of(buildCriterion("hasParentNode", Condition.EQUAL, "false"))); final Filter filter = new Filter(); filter.setOr( new ConjunctiveCriterionArray(ImmutableList.of(new ConjunctiveCriterion().setAnd(array)))); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolver.java index 7ca79b168819e..9f6808775fff9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.glossary; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; @@ -15,7 +16,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntity; @@ -84,11 +84,7 @@ public CompletableFuture get( private Filter buildGlossaryEntitiesFilter() { CriterionArray array = new CriterionArray( - ImmutableList.of( - new Criterion() - .setField("hasParentNode") - .setValue("false") - .setCondition(Condition.EQUAL))); + ImmutableList.of(buildCriterion("hasParentNode", Condition.EQUAL, "false"))); final Filter filter = new Filter(); filter.setOr( new ConjunctiveCriterionArray(ImmutableList.of(new ConjunctiveCriterion().setAnd(array)))); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolver.java index 59f820d7cbd36..4e1ffcc00cd89 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolver.java @@ -42,8 +42,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw return GraphQLConcurrencyUtils.supplyAsync( () -> { - final Urn parentUrn = GlossaryUtils.getParentUrn(urn, context, _entityClient); - if (GlossaryUtils.canManageChildrenEntities(context, parentUrn, _entityClient)) { + if (GlossaryUtils.canUpdateGlossaryEntity(urn, context, _entityClient)) { try { final TermRelationshipType relationshipType = input.getRelationshipType(); final List termUrnsToRemove = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/health/EntityHealthResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/health/EntityHealthResolver.java index 380b7c920ab2f..0a8e0e42a0914 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/health/EntityHealthResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/health/EntityHealthResolver.java @@ -1,5 +1,8 @@ package com.linkedin.datahub.graphql.resolvers.health; +import static com.linkedin.metadata.Constants.ASSERTION_RUN_EVENT_STATUS_COMPLETE; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.google.common.collect.ImmutableList; import com.linkedin.common.EntityRelationships; import com.linkedin.data.template.StringArray; @@ -251,16 +254,12 @@ private Filter createAssertionsFilter(final String datasetUrn) { final ArrayList criteria = new ArrayList<>(); // Add filter for asserteeUrn == datasetUrn - Criterion datasetUrnCriterion = - new Criterion().setField("asserteeUrn").setCondition(Condition.EQUAL).setValue(datasetUrn); + Criterion datasetUrnCriterion = buildCriterion("asserteeUrn", Condition.EQUAL, datasetUrn); criteria.add(datasetUrnCriterion); // Add filter for result == result Criterion startTimeCriterion = - new Criterion() - .setField("status") - .setCondition(Condition.EQUAL) - .setValue(Constants.ASSERTION_RUN_EVENT_STATUS_COMPLETE); + buildCriterion("status", Condition.EQUAL, ASSERTION_RUN_EVENT_STATUS_COMPLETE); criteria.add(startTimeCriterion); filter.setOr( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java index a4c2ab42227d9..8110fe93f8ab7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java @@ -1,5 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.ingest.execution; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; @@ -62,10 +64,7 @@ public CompletableFuture get( // 1. Fetch the related edges final Criterion filterCriterion = - new Criterion() - .setField(INGESTION_SOURCE_FIELD_NAME) - .setCondition(Condition.EQUAL) - .setValue(urn); + buildCriterion(INGESTION_SOURCE_FIELD_NAME, Condition.EQUAL, urn); final SearchResult executionsSearchResult = _entityClient.filter( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java index d7c76c0235dcc..dd470e766378b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java @@ -1,5 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.jobs; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; @@ -12,7 +14,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -116,14 +117,10 @@ private Filter buildTaskRunsEntityFilter(final String entityUrn) { CriterionArray array = new CriterionArray( ImmutableList.of( - new Criterion() - .setField(PARENT_TEMPLATE_URN_SEARCH_INDEX_FIELD_NAME) - .setCondition(Condition.EQUAL) - .setValue(entityUrn), - new Criterion() - .setField(HAS_RUN_EVENTS_FIELD_NAME) - .setCondition(Condition.EQUAL) - .setValue(Boolean.TRUE.toString()))); + buildCriterion( + PARENT_TEMPLATE_URN_SEARCH_INDEX_FIELD_NAME, Condition.EQUAL, entityUrn), + buildCriterion( + HAS_RUN_EVENTS_FIELD_NAME, Condition.EQUAL, Boolean.TRUE.toString()))); final Filter filter = new Filter(); filter.setOr( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java index 82c5b73d87152..d71b6f5d01f4f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java @@ -1,5 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.jobs; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; @@ -15,7 +17,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -121,13 +122,12 @@ private Filter buildTaskRunsEntityFilter( CriterionArray array = new CriterionArray( ImmutableList.of( - new Criterion() - .setField( - direction.equals(RelationshipDirection.INCOMING) - ? INPUT_FIELD_NAME - : OUTPUT_FIELD_NAME) - .setCondition(Condition.EQUAL) - .setValue(entityUrn))); + buildCriterion( + direction.equals(RelationshipDirection.INCOMING) + ? INPUT_FIELD_NAME + : OUTPUT_FIELD_NAME, + Condition.EQUAL, + entityUrn))); final Filter filter = new Filter(); filter.setOr( new ConjunctiveCriterionArray(ImmutableList.of(new ConjunctiveCriterion().setAnd(array)))); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddLinkResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddLinkResolver.java index 5cffcd9c35c00..c71832b956a7c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddLinkResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddLinkResolver.java @@ -11,7 +11,6 @@ import com.linkedin.datahub.graphql.resolvers.mutate.util.GlossaryUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.LinkUtils; import com.linkedin.entity.client.EntityClient; -import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -36,7 +35,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw Urn targetUrn = Urn.createFromString(input.getResourceUrn()); if (!LinkUtils.isAuthorizedToUpdateLinks(context, targetUrn) - && !canUpdateGlossaryEntityLinks(targetUrn, context)) { + && !GlossaryUtils.canUpdateGlossaryEntity(targetUrn, context, _entityClient)) { throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); } @@ -70,18 +69,4 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw this.getClass().getSimpleName(), "get"); } - - // Returns whether this is a glossary entity and whether you can edit this glossary entity with - // the - // Manage all children or Manage direct children privileges - private boolean canUpdateGlossaryEntityLinks(Urn targetUrn, QueryContext context) { - final boolean isGlossaryEntity = - targetUrn.getEntityType().equals(Constants.GLOSSARY_TERM_ENTITY_NAME) - || targetUrn.getEntityType().equals(Constants.GLOSSARY_NODE_ENTITY_NAME); - if (!isGlossaryEntity) { - return false; - } - final Urn parentNodeUrn = GlossaryUtils.getParentUrn(targetUrn, context, _entityClient); - return GlossaryUtils.canManageChildrenEntities(context, parentNodeUrn, _entityClient); - } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java index 7c0f7b3757ee9..db58236a4e615 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java @@ -11,6 +11,7 @@ import com.linkedin.datahub.graphql.generated.OwnerInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -23,6 +24,7 @@ public class AddOwnerResolver implements DataFetcher> { private final EntityService _entityService; + private final EntityClient _entityClient; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -41,7 +43,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw } OwnerInput ownerInput = ownerInputBuilder.build(); - OwnerUtils.validateAuthorizedToUpdateOwners(context, targetUrn); + OwnerUtils.validateAuthorizedToUpdateOwners(context, targetUrn, _entityClient); return GraphQLConcurrencyUtils.supplyAsync( () -> { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java index ade4e7b744801..329b0abf31149 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java @@ -11,6 +11,7 @@ import com.linkedin.datahub.graphql.generated.OwnerInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -24,6 +25,7 @@ public class AddOwnersResolver implements DataFetcher> { private final EntityService _entityService; + private final EntityClient _entityClient; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -35,7 +37,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw return GraphQLConcurrencyUtils.supplyAsync( () -> { - OwnerUtils.validateAuthorizedToUpdateOwners(environment.getContext(), targetUrn); + OwnerUtils.validateAuthorizedToUpdateOwners( + environment.getContext(), targetUrn, _entityClient); OwnerUtils.validateAddOwnerInput( context.getOperationContext(), owners, targetUrn, _entityService); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java index 28daef1b11062..3f0f5e0b9edcf 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java @@ -11,6 +11,7 @@ import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.LabelUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -27,6 +28,7 @@ public class BatchAddOwnersResolver implements DataFetcher> { private final EntityService _entityService; + private final EntityClient _entityClient; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -80,7 +82,7 @@ private void validateInputResource( "Malformed input provided: owners cannot be applied to subresources."); } - OwnerUtils.validateAuthorizedToUpdateOwners(context, resourceUrn); + OwnerUtils.validateAuthorizedToUpdateOwners(context, resourceUrn, _entityClient); LabelUtils.validateResource( opContext, resourceUrn, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java index 5aaace4e21e9c..4772b3ef27ac9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java @@ -10,6 +10,7 @@ import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.LabelUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -24,6 +25,7 @@ public class BatchRemoveOwnersResolver implements DataFetcher> { private final EntityService _entityService; + private final EntityClient _entityClient; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -72,7 +74,7 @@ private void validateInputResource(ResourceRefInput resource, QueryContext conte "Malformed input provided: owners cannot be removed from subresources."); } - OwnerUtils.validateAuthorizedToUpdateOwners(context, resourceUrn); + OwnerUtils.validateAuthorizedToUpdateOwners(context, resourceUrn, _entityClient); LabelUtils.validateResource( context.getOperationContext(), resourceUrn, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchSetDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchSetDomainResolver.java index abbeed29545e4..3cf8e801d4171 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchSetDomainResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchSetDomainResolver.java @@ -11,6 +11,7 @@ import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.LabelUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -28,6 +29,7 @@ public class BatchSetDomainResolver implements DataFetcher> { private final EntityService _entityService; + private final EntityClient _entityClient; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -74,7 +76,7 @@ private void validateInputResources(List resources, QueryConte private void validateInputResource(ResourceRefInput resource, QueryContext context) { final Urn resourceUrn = UrnUtils.getUrn(resource.getResourceUrn()); - if (!DomainUtils.isAuthorizedToUpdateDomainsForEntity(context, resourceUrn)) { + if (!DomainUtils.isAuthorizedToUpdateDomainsForEntity(context, resourceUrn, _entityClient)) { throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveLinkResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveLinkResolver.java index e047a24a0adaa..584a0e3e9c2aa 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveLinkResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveLinkResolver.java @@ -8,7 +8,9 @@ import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.RemoveLinkInput; +import com.linkedin.datahub.graphql.resolvers.mutate.util.GlossaryUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.LinkUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -21,6 +23,7 @@ public class RemoveLinkResolver implements DataFetcher> { private final EntityService _entityService; + private final EntityClient _entityClient; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -31,7 +34,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw String linkUrl = input.getLinkUrl(); Urn targetUrn = Urn.createFromString(input.getResourceUrn()); - if (!LinkUtils.isAuthorizedToUpdateLinks(context, targetUrn)) { + if (!LinkUtils.isAuthorizedToUpdateLinks(context, targetUrn) + && !GlossaryUtils.canUpdateGlossaryEntity(targetUrn, context, _entityClient)) { throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java index 8d14884885572..dfb4778644321 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java @@ -10,6 +10,7 @@ import com.linkedin.datahub.graphql.generated.RemoveOwnerInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -22,6 +23,7 @@ public class RemoveOwnerResolver implements DataFetcher> { private final EntityService _entityService; + private final EntityClient _entityClient; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -36,7 +38,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw ? null : Urn.createFromString(input.getOwnershipTypeUrn()); - OwnerUtils.validateAuthorizedToUpdateOwners(context, targetUrn); + OwnerUtils.validateAuthorizedToUpdateOwners(context, targetUrn, _entityClient); return GraphQLConcurrencyUtils.supplyAsync( () -> { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java index ad6dbbe635ed1..87aad3f767d95 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java @@ -87,8 +87,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw private Boolean updateGlossaryTermName( Urn targetUrn, UpdateNameInput input, QueryContext context) { - final Urn parentNodeUrn = GlossaryUtils.getParentUrn(targetUrn, context, _entityClient); - if (GlossaryUtils.canManageChildrenEntities(context, parentNodeUrn, _entityClient)) { + if (GlossaryUtils.canUpdateGlossaryEntity(targetUrn, context, _entityClient)) { try { GlossaryTermInfo glossaryTermInfo = (GlossaryTermInfo) @@ -123,8 +122,7 @@ private Boolean updateGlossaryTermName( private Boolean updateGlossaryNodeName( Urn targetUrn, UpdateNameInput input, QueryContext context) { - final Urn parentNodeUrn = GlossaryUtils.getParentUrn(targetUrn, context, _entityClient); - if (GlossaryUtils.canManageChildrenEntities(context, parentNodeUrn, _entityClient)) { + if (GlossaryUtils.canUpdateGlossaryEntity(targetUrn, context, _entityClient)) { try { GlossaryNodeInfo glossaryNodeInfo = (GlossaryNodeInfo) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/BusinessAttributeUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/BusinessAttributeUtils.java index 25dc36f74ef73..de5176ca440a6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/BusinessAttributeUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/BusinessAttributeUtils.java @@ -1,12 +1,13 @@ package com.linkedin.datahub.graphql.resolvers.mutate.util; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchResult; @@ -60,11 +61,7 @@ private static Filter buildNameFilter(String name) { } private static CriterionArray buildNameCriterion(@Nonnull final String name) { - return new CriterionArray( - new Criterion() - .setField(NAME_INDEX_FIELD_NAME) - .setValue(name) - .setCondition(Condition.EQUAL)); + return new CriterionArray(buildCriterion(NAME_INDEX_FIELD_NAME, Condition.EQUAL, name)); } public static SchemaFieldDataType mapSchemaFieldDataType( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java index 1dcdd988f5e7c..bf94585467814 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java @@ -2,6 +2,8 @@ import static com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils.*; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; +import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion; import com.datahub.authorization.ConjunctivePrivilegeGroup; import com.datahub.authorization.DisjunctivePrivilegeGroup; @@ -58,7 +60,12 @@ public class DomainUtils { private DomainUtils() {} public static boolean isAuthorizedToUpdateDomainsForEntity( - @Nonnull QueryContext context, Urn entityUrn) { + @Nonnull QueryContext context, Urn entityUrn, EntityClient entityClient) { + + if (GlossaryUtils.canUpdateGlossaryEntity(entityUrn, context, entityClient)) { + return true; + } + final DisjunctivePrivilegeGroup orPrivilegeGroups = new DisjunctivePrivilegeGroup( ImmutableList.of( @@ -118,16 +125,9 @@ public static void validateDomain( private static List buildRootDomainCriteria() { final List criteria = new ArrayList<>(); - criteria.add( - new Criterion() - .setField(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME) - .setValue("false") - .setCondition(Condition.EQUAL)); - criteria.add( - new Criterion() - .setField(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME) - .setValue("") - .setCondition(Condition.IS_NULL)); + criteria.add(buildCriterion(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME, Condition.EQUAL, "false")); + + criteria.add(buildIsNullCriterion(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME)); return criteria; } @@ -135,25 +135,17 @@ private static List buildRootDomainCriteria() { private static List buildParentDomainCriteria(@Nonnull final Urn parentDomainUrn) { final List criteria = new ArrayList<>(); + criteria.add(buildCriterion(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME, Condition.EQUAL, "true")); + criteria.add( - new Criterion() - .setField(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME) - .setValue("true") - .setCondition(Condition.EQUAL)); - criteria.add( - new Criterion() - .setField(PARENT_DOMAIN_INDEX_FIELD_NAME) - .setValue(parentDomainUrn.toString()) - .setCondition(Condition.EQUAL)); + buildCriterion( + PARENT_DOMAIN_INDEX_FIELD_NAME, Condition.EQUAL, parentDomainUrn.toString())); return criteria; } private static Criterion buildNameCriterion(@Nonnull final String name) { - return new Criterion() - .setField(NAME_INDEX_FIELD_NAME) - .setValue(name) - .setCondition(Condition.EQUAL); + return buildCriterion(NAME_INDEX_FIELD_NAME, Condition.EQUAL, name); } /** diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java index cac0cca2682e8..537562a3b7d98 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java @@ -1,5 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.mutate.util; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; @@ -95,11 +97,7 @@ private static Criterion buildFormCriterion( private static Criterion buildFormCriterion( @Nonnull final String formUrn, @Nonnull final String field, final boolean negated) { - return new Criterion() - .setField(field) - .setValue(formUrn) - .setCondition(Condition.EQUAL) - .setNegated(negated); + return buildCriterion(field, Condition.EQUAL, negated, formUrn); } private static boolean isActorExplicitlyAssigned( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/GlossaryUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/GlossaryUtils.java index 0d8e505a948e5..9ff908b4ee37f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/GlossaryUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/GlossaryUtils.java @@ -36,6 +36,21 @@ public static boolean canManageGlossaries(@Nonnull QueryContext context) { context.getOperationContext(), PoliciesConfig.MANAGE_GLOSSARIES_PRIVILEGE); } + // Returns whether this is a glossary entity and whether you can edit this glossary entity with + // the + // Manage all children or Manage direct children privileges + public static boolean canUpdateGlossaryEntity( + Urn targetUrn, QueryContext context, EntityClient entityClient) { + final boolean isGlossaryEntity = + targetUrn.getEntityType().equals(Constants.GLOSSARY_TERM_ENTITY_NAME) + || targetUrn.getEntityType().equals(Constants.GLOSSARY_NODE_ENTITY_NAME); + if (!isGlossaryEntity) { + return false; + } + final Urn parentNodeUrn = GlossaryUtils.getParentUrn(targetUrn, context, entityClient); + return GlossaryUtils.canManageChildrenEntities(context, parentNodeUrn, entityClient); + } + /** * Returns true if the current user is able to create, delete, or move Glossary Terms and Nodes * under a parent Node. They can do this with either the global MANAGE_GLOSSARIES privilege, or if diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java index 2f2b52f7ab586..b9a12a19f617a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java @@ -20,6 +20,7 @@ import com.linkedin.datahub.graphql.generated.OwnerInput; import com.linkedin.datahub.graphql.generated.OwnershipType; import com.linkedin.datahub.graphql.generated.ResourceRefInput; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; @@ -195,7 +196,12 @@ private static void removeOwnersIfExists( } public static void validateAuthorizedToUpdateOwners( - @Nonnull QueryContext context, Urn resourceUrn) { + @Nonnull QueryContext context, Urn resourceUrn, EntityClient entityClient) { + + if (GlossaryUtils.canUpdateGlossaryEntity(resourceUrn, context, entityClient)) { + return; + } + final DisjunctivePrivilegeGroup orPrivilegeGroups = new DisjunctivePrivilegeGroup( ImmutableList.of( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java index 28334b2c0af9a..77f6eb285ecc5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java @@ -2,7 +2,6 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; -import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; @@ -179,7 +178,7 @@ private RecommendationParams mapRecommendationParams( criterion -> FacetFilter.builder() .setField(criterion.getField()) - .setValues(ImmutableList.of(criterion.getValue())) + .setValues(criterion.getValues()) .build()) .collect(Collectors.toList())); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpsertStructuredPropertiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpsertStructuredPropertiesResolver.java index 5f69512e5946b..770c8a0d749c3 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpsertStructuredPropertiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/structuredproperties/UpsertStructuredPropertiesResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.structuredproperties; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.Constants.SCHEMA_FIELD_ENTITY_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTIES_ASPECT_NAME; import com.datahub.authentication.Authentication; @@ -74,7 +75,9 @@ public CompletableFuture mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -67,7 +69,7 @@ public void testGetSuccessNoExistingDomains() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_DOMAIN_2_URN)), eq(true))) .thenReturn(true); - BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); + BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -106,6 +108,7 @@ public void testGetSuccessExistingDomains() throws Exception { .setDomains(new UrnArray(ImmutableList.of(Urn.createFromString(TEST_DOMAIN_1_URN)))); EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -133,7 +136,7 @@ public void testGetSuccessExistingDomains() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_DOMAIN_2_URN)), eq(true))) .thenReturn(true); - BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); + BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -177,6 +180,7 @@ public void testGetSuccessUnsetDomains() throws Exception { .setDomains(new UrnArray(ImmutableList.of(Urn.createFromString(TEST_DOMAIN_1_URN)))); EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -204,7 +208,7 @@ public void testGetSuccessUnsetDomains() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_DOMAIN_2_URN)), eq(true))) .thenReturn(true); - BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); + BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -234,6 +238,7 @@ public void testGetSuccessUnsetDomains() throws Exception { @Test public void testGetFailureDomainDoesNotExist() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -248,7 +253,7 @@ public void testGetFailureDomainDoesNotExist() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_DOMAIN_1_URN)), eq(true))) .thenReturn(false); - BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); + BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -269,6 +274,7 @@ public void testGetFailureDomainDoesNotExist() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -292,7 +298,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_DOMAIN_1_URN)), eq(true))) .thenReturn(true); - BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); + BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -313,8 +319,9 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); - BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); + BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService, mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); @@ -335,12 +342,13 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.doThrow(RuntimeException.class) .when(mockService) .ingestProposal(any(), Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); - BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); + BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService, mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java index ad5d7f1ef6b06..5be65703846a9 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.domain; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.testng.Assert.*; @@ -45,11 +46,7 @@ public void testGetSuccess() throws Exception { final String childUrn = "urn:li:dataset:(test,test,test)"; final String domainUrn = "urn:li:domain:test-domain"; - final Criterion filterCriterion = - new Criterion() - .setField("domains.keyword") - .setCondition(Condition.EQUAL) - .setValue(domainUrn); + final Criterion filterCriterion = buildCriterion("domains.keyword", Condition.EQUAL, domainUrn); Mockito.when( mockClient.searchAcrossEntities( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolverTest.java index 60787fc47c88a..cb9df747572c4 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolverTest.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.glossary; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.testng.Assert.*; @@ -16,7 +17,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntity; @@ -76,11 +76,7 @@ public void testGetSuccess() throws Exception { private Filter buildGlossaryEntitiesFilter() { CriterionArray array = new CriterionArray( - ImmutableList.of( - new Criterion() - .setField("hasParentNode") - .setValue("false") - .setCondition(Condition.EQUAL))); + ImmutableList.of(buildCriterion("hasParentNode", Condition.EQUAL, "false"))); final Filter filter = new Filter(); filter.setOr( new ConjunctiveCriterionArray(ImmutableList.of(new ConjunctiveCriterion().setAnd(array)))); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolverTest.java index 51760ff9d37f2..29af303f7db2d 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolverTest.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.glossary; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.testng.Assert.*; @@ -16,7 +17,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntity; @@ -76,11 +76,7 @@ public void testGetSuccess() throws Exception { private Filter buildGlossaryEntitiesFilter() { CriterionArray array = new CriterionArray( - ImmutableList.of( - new Criterion() - .setField("hasParentNode") - .setValue("false") - .setCondition(Condition.EQUAL))); + ImmutableList.of(buildCriterion("hasParentNode", Condition.EQUAL, "false"))); final Filter filter = new Filter(); filter.setOr( new ConjunctiveCriterionArray(ImmutableList.of(new ConjunctiveCriterion().setAnd(array)))); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java index b239e0300ffcc..3222cc8c1878f 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java @@ -20,6 +20,7 @@ import com.linkedin.datahub.graphql.generated.OwnershipType; import com.linkedin.datahub.graphql.resolvers.mutate.AddOwnersResolver; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; @@ -39,6 +40,7 @@ public class AddOwnersResolverTest { @Test public void testGetSuccessNoExistingOwners() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -66,7 +68,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { eq(true))) .thenReturn(true); - AddOwnersResolver resolver = new AddOwnersResolver(mockService); + AddOwnersResolver resolver = new AddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -102,6 +104,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { @Test public void testGetSuccessExistingOwnerNewType() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); com.linkedin.common.Ownership oldOwnership = new Ownership() @@ -138,7 +141,7 @@ public void testGetSuccessExistingOwnerNewType() throws Exception { eq(true))) .thenReturn(true); - AddOwnersResolver resolver = new AddOwnersResolver(mockService); + AddOwnersResolver resolver = new AddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -168,6 +171,7 @@ public void testGetSuccessExistingOwnerNewType() throws Exception { @Test public void testGetSuccessDeprecatedTypeToOwnershipType() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); com.linkedin.common.Ownership oldOwnership = new Ownership() @@ -201,7 +205,7 @@ public void testGetSuccessDeprecatedTypeToOwnershipType() throws Exception { eq(true))) .thenReturn(true); - AddOwnersResolver resolver = new AddOwnersResolver(mockService); + AddOwnersResolver resolver = new AddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -231,6 +235,7 @@ public void testGetSuccessDeprecatedTypeToOwnershipType() throws Exception { @Test public void testGetSuccessMultipleOwnerTypes() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); com.linkedin.common.Ownership oldOwnership = new Ownership() @@ -281,7 +286,7 @@ public void testGetSuccessMultipleOwnerTypes() throws Exception { eq(true))) .thenReturn(true); - AddOwnersResolver resolver = new AddOwnersResolver(mockService); + AddOwnersResolver resolver = new AddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -329,6 +334,7 @@ public void testGetSuccessMultipleOwnerTypes() throws Exception { @Test public void testGetFailureOwnerDoesNotExist() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -343,7 +349,7 @@ public void testGetFailureOwnerDoesNotExist() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_OWNER_1_URN)), eq(true))) .thenReturn(false); - AddOwnersResolver resolver = new AddOwnersResolver(mockService); + AddOwnersResolver resolver = new AddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -367,6 +373,7 @@ public void testGetFailureOwnerDoesNotExist() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -381,7 +388,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_OWNER_1_URN)), eq(true))) .thenReturn(true); - AddOwnersResolver resolver = new AddOwnersResolver(mockService); + AddOwnersResolver resolver = new AddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -405,8 +412,9 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); - AddOwnersResolver resolver = new AddOwnersResolver(mockService); + AddOwnersResolver resolver = new AddOwnersResolver(mockService, mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); @@ -430,12 +438,14 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.doThrow(RuntimeException.class) .when(mockService) .ingestProposal(any(), any(AspectsBatchImpl.class), Mockito.anyBoolean()); - AddOwnersResolver resolver = new AddOwnersResolver(Mockito.mock(EntityService.class)); + AddOwnersResolver resolver = + new AddOwnersResolver(Mockito.mock(EntityService.class), mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java index 8275f9f83ef83..2071b01c10558 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java @@ -19,6 +19,7 @@ import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.BatchAddOwnersResolver; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; @@ -39,6 +40,7 @@ public class BatchAddOwnersResolverTest { @Test public void testGetSuccessNoExistingOwners() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -77,7 +79,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { eq(true))) .thenReturn(true); - BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -127,6 +129,7 @@ public void testGetSuccessExistingOwners() throws Exception { .setOwner(Urn.createFromString(TEST_OWNER_URN_1)) .setType(OwnershipType.TECHNICAL_OWNER)))); EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -176,7 +179,7 @@ public void testGetSuccessExistingOwners() throws Exception { eq(true))) .thenReturn(true); - BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -218,6 +221,7 @@ public void testGetSuccessExistingOwners() throws Exception { @Test public void testGetFailureOwnerDoesNotExist() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -232,7 +236,7 @@ public void testGetFailureOwnerDoesNotExist() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_OWNER_URN_1)), eq(true))) .thenReturn(false); - BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -268,6 +272,7 @@ public void testGetFailureOwnerDoesNotExist() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -291,7 +296,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_OWNER_URN_1)), eq(true))) .thenReturn(true); - BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -327,8 +332,9 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); - BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService, mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); @@ -364,12 +370,13 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.doThrow(RuntimeException.class) .when(mockService) .ingestProposal(any(), Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); - BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService, mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java index 9ea9ac693b98e..24380e2e52d84 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java @@ -16,6 +16,7 @@ import com.linkedin.datahub.graphql.generated.BatchRemoveOwnersInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.BatchRemoveOwnersResolver; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; @@ -36,6 +37,7 @@ public class BatchRemoveOwnersResolverTest { @Test public void testGetSuccessNoExistingOwners() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -62,7 +64,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_OWNER_URN_2)), eq(true))) .thenReturn(true); - BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -84,6 +86,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { @Test public void testGetSuccessExistingOwners() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); final Ownership oldOwners1 = new Ownership() @@ -129,7 +132,7 @@ public void testGetSuccessExistingOwners() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_OWNER_URN_2)), eq(true))) .thenReturn(true); - BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -151,6 +154,7 @@ public void testGetSuccessExistingOwners() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -174,7 +178,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_OWNER_URN_1)), eq(true))) .thenReturn(true); - BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -196,8 +200,9 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); - BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService, mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); @@ -219,12 +224,13 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.doThrow(RuntimeException.class) .when(mockService) .ingestProposal(any(), Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); - BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService, mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java index 129866bb0fa07..1b33118bd154a 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import com.google.common.collect.ImmutableList; @@ -21,7 +22,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntityArray; @@ -348,13 +348,7 @@ private static Filter createFilter(String field, String value) { new ConjunctiveCriterion() .setAnd( new CriterionArray( - ImmutableList.of( - new Criterion() - .setField(field) - .setValue(value) - .setCondition(Condition.EQUAL) - .setNegated(false) - .setValues(new StringArray(ImmutableList.of(value)))))))); + ImmutableList.of(buildCriterion(field, Condition.EQUAL, value)))))); } private static DataHubViewInfo getViewInfo(Filter viewFilter) { diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteForMultipleResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteForMultipleResolverTest.java index 45190bbfc93e5..17ed6ef5632a1 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteForMultipleResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteForMultipleResolverTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.search; import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import com.google.common.collect.ImmutableList; @@ -20,9 +21,9 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.query.AutoCompleteEntityArray; import com.linkedin.metadata.query.AutoCompleteResult; +import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.service.ViewService; @@ -264,10 +265,7 @@ private static DataHubViewInfo createViewInfo(StringArray entityNames) { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("field", Condition.EQUAL, "test")))))); DataHubViewInfo info = new DataHubViewInfo(); info.setName("test"); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java index 86508f1fd2742..a601a815453b2 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.TestUtils.*; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import com.google.common.collect.ImmutableList; @@ -21,7 +22,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntityArray; @@ -57,10 +57,7 @@ public static void testApplyViewNullBaseFilter() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("field", Condition.EQUAL, "test")))))); DataHubViewInfo info = new DataHubViewInfo(); info.setName("test"); @@ -135,10 +132,7 @@ public static void testApplyViewBaseFilter() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("field", Condition.EQUAL, "test")))))); DataHubViewInfo info = new DataHubViewInfo(); info.setName("test"); @@ -163,13 +157,7 @@ public static void testApplyViewBaseFilter() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("baseField") - .setValue("baseTest") - .setCondition(Condition.EQUAL) - .setNegated(false) - .setValues( - new StringArray(ImmutableList.of("baseTest")))))))); + buildCriterion("baseField", Condition.EQUAL, "baseTest")))))); EntityClient mockClient = initMockEntityClient( @@ -236,10 +224,7 @@ public static void testApplyViewNullBaseEntityTypes() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("field", Condition.EQUAL, "test")))))); DataHubViewInfo info = new DataHubViewInfo(); info.setName("test"); @@ -306,10 +291,7 @@ public static void testApplyViewEmptyBaseEntityTypes() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("field", Condition.EQUAL, "test")))))); DataHubViewInfo info = new DataHubViewInfo(); info.setName("test"); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtilsTest.java index 8f23f0a624576..832aa97d9216a 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtilsTest.java @@ -1,11 +1,12 @@ package com.linkedin.datahub.graphql.resolvers.search; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.google.common.collect.ImmutableList; -import com.linkedin.data.template.StringArray; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import java.util.List; @@ -25,10 +26,7 @@ public static void testApplyViewToFilterNullBaseFilter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("field", Condition.EQUAL, "test")))))); Filter result = SearchUtils.combineFilters(null, viewFilter); Assert.assertEquals(viewFilter, result); @@ -45,28 +43,14 @@ public static void testApplyViewToFilterComplexBaseFilter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field1") - .setValue("test1") - .setValues(new StringArray(ImmutableList.of("test1"))), - new Criterion() - .setField("field2") - .setValue("test2") - .setValues( - new StringArray(ImmutableList.of("test2")))))), + buildCriterion("field1", Condition.EQUAL, "test1"), + buildCriterion("field2", Condition.EQUAL, "test2")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field3") - .setValue("test3") - .setValues(new StringArray(ImmutableList.of("test3"))), - new Criterion() - .setField("field4") - .setValue("test4") - .setValues( - new StringArray(ImmutableList.of("test4"))))))))); + buildCriterion("field3", Condition.EQUAL, "test3"), + buildCriterion("field4", Condition.EQUAL, "test4"))))))); Filter viewFilter = new Filter() @@ -76,10 +60,7 @@ public static void testApplyViewToFilterComplexBaseFilter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("field", Condition.EQUAL, "test")))))); Filter result = SearchUtils.combineFilters(baseFilter, viewFilter); @@ -92,36 +73,16 @@ public static void testApplyViewToFilterComplexBaseFilter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field1") - .setValue("test1") - .setValues(new StringArray(ImmutableList.of("test1"))), - new Criterion() - .setField("field2") - .setValue("test2") - .setValues(new StringArray(ImmutableList.of("test2"))), - new Criterion() - .setField("field") - .setValue("test") - .setValues( - new StringArray(ImmutableList.of("test")))))), + buildCriterion("field1", Condition.EQUAL, "test1"), + buildCriterion("field2", Condition.EQUAL, "test2"), + buildCriterion("field", Condition.EQUAL, "test")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field3") - .setValue("test3") - .setValues(new StringArray(ImmutableList.of("test3"))), - new Criterion() - .setField("field4") - .setValue("test4") - .setValues(new StringArray(ImmutableList.of("test4"))), - new Criterion() - .setField("field") - .setValue("test") - .setValues( - new StringArray(ImmutableList.of("test"))))))))); + buildCriterion("field3", Condition.EQUAL, "test3"), + buildCriterion("field4", Condition.EQUAL, "test4"), + buildCriterion("field", Condition.EQUAL, "test"))))))); Assert.assertEquals(expectedResult, result); } @@ -137,28 +98,14 @@ public static void testApplyViewToFilterComplexViewFilter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field1") - .setValue("test1") - .setValues(new StringArray(ImmutableList.of("test1"))), - new Criterion() - .setField("field2") - .setValue("test2") - .setValues( - new StringArray(ImmutableList.of("test2")))))), + buildCriterion("field1", Condition.EQUAL, "test1"), + buildCriterion("field2", Condition.EQUAL, "test2")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field3") - .setValue("test3") - .setValues(new StringArray(ImmutableList.of("test3"))), - new Criterion() - .setField("field4") - .setValue("test4") - .setValues( - new StringArray(ImmutableList.of("test4"))))))))); + buildCriterion("field3", Condition.EQUAL, "test3"), + buildCriterion("field4", Condition.EQUAL, "test4"))))))); Filter viewFilter = new Filter() @@ -169,31 +116,16 @@ public static void testApplyViewToFilterComplexViewFilter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("viewField1") - .setValue("viewTest1") - .setValues( - new StringArray(ImmutableList.of("viewTest1"))), - new Criterion() - .setField("viewField2") - .setValue("viewTest2") - .setValues( - new StringArray(ImmutableList.of("viewTest2")))))), + buildCriterion("viewField1", Condition.EQUAL, "viewTest1"), + buildCriterion( + "viewField2", Condition.EQUAL, "viewTest2")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("viewField3") - .setValue("viewTest3") - .setValues( - new StringArray(ImmutableList.of("viewTest3"))), - new Criterion() - .setField("viewField4") - .setValue("viewTest4") - .setValues( - new StringArray( - ImmutableList.of("viewTest4"))))))))); + buildCriterion("viewField3", Condition.EQUAL, "viewTest3"), + buildCriterion( + "viewField4", Condition.EQUAL, "viewTest4"))))))); Filter result = SearchUtils.combineFilters(baseFilter, viewFilter); @@ -206,91 +138,38 @@ public static void testApplyViewToFilterComplexViewFilter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field1") - .setValue("test1") - .setValues(new StringArray(ImmutableList.of("test1"))), - new Criterion() - .setField("field2") - .setValue("test2") - .setValues(new StringArray(ImmutableList.of("test2"))), - new Criterion() - .setField("viewField1") - .setValue("viewTest1") - .setValues( - new StringArray(ImmutableList.of("viewTest1"))), - new Criterion() - .setField("viewField2") - .setValue("viewTest2") - .setValues( - new StringArray(ImmutableList.of("viewTest2")))))), + buildCriterion("field1", Condition.EQUAL, "test1"), + buildCriterion("field2", Condition.EQUAL, "test2"), + buildCriterion("viewField1", Condition.EQUAL, "viewTest1"), + buildCriterion( + "viewField2", Condition.EQUAL, "viewTest2")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field1") - .setValue("test1") - .setValues(new StringArray(ImmutableList.of("test1"))), - new Criterion() - .setField("field2") - .setValue("test2") - .setValues(new StringArray(ImmutableList.of("test2"))), - new Criterion() - .setField("viewField3") - .setValue("viewTest3") - .setValues( - new StringArray(ImmutableList.of("viewTest3"))), - new Criterion() - .setField("viewField4") - .setValue("viewTest4") - .setValues( - new StringArray(ImmutableList.of("viewTest4")))))), + buildCriterion("field1", Condition.EQUAL, "test1"), + buildCriterion("field2", Condition.EQUAL, "test2"), + buildCriterion("viewField3", Condition.EQUAL, "viewTest3"), + buildCriterion( + "viewField4", Condition.EQUAL, "viewTest4")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field3") - .setValue("test3") - .setValues(new StringArray(ImmutableList.of("test3"))), - new Criterion() - .setField("field4") - .setValue("test4") - .setValues(new StringArray(ImmutableList.of("test4"))), - new Criterion() - .setField("viewField1") - .setValue("viewTest1") - .setValues( - new StringArray(ImmutableList.of("viewTest1"))), - new Criterion() - .setField("viewField2") - .setValue("viewTest2") - .setValues( - new StringArray(ImmutableList.of("viewTest2")))))), + buildCriterion("field3", Condition.EQUAL, "test3"), + buildCriterion("field4", Condition.EQUAL, "test4"), + buildCriterion("viewField1", Condition.EQUAL, "viewTest1"), + buildCriterion( + "viewField2", Condition.EQUAL, "viewTest2")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field3") - .setValue("test3") - .setValues(new StringArray(ImmutableList.of("test3"))), - new Criterion() - .setField("field4") - .setValue("test4") - .setValues(new StringArray(ImmutableList.of("test4"))), - new Criterion() - .setField("viewField3") - .setValue("viewTest3") - .setValues( - new StringArray(ImmutableList.of("viewTest3"))), - new Criterion() - .setField("viewField4") - .setValue("viewTest4") - .setValues( - new StringArray( - ImmutableList.of("viewTest4"))))))))); + buildCriterion("field3", Condition.EQUAL, "test3"), + buildCriterion("field4", Condition.EQUAL, "test4"), + buildCriterion("viewField3", Condition.EQUAL, "viewTest3"), + buildCriterion( + "viewField4", Condition.EQUAL, "viewTest4"))))))); Assert.assertEquals(expectedResult, result); } @@ -302,28 +181,16 @@ public static void testApplyViewToFilterV1Filter() { .setCriteria( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field1") - .setValue("test1") - .setValues(new StringArray(ImmutableList.of("test1"))), - new Criterion() - .setField("field2") - .setValue("test2") - .setValues(new StringArray(ImmutableList.of("test2")))))); + buildCriterion("field1", Condition.EQUAL, "test1"), + buildCriterion("field2", Condition.EQUAL, "test2")))); Filter viewFilter = new Filter() .setCriteria( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("viewField1") - .setValue("viewTest1") - .setValues(new StringArray(ImmutableList.of("viewTest1"))), - new Criterion() - .setField("viewField2") - .setValue("viewTest2") - .setValues(new StringArray(ImmutableList.of("viewTest2")))))); + buildCriterion("viewField1", Condition.EQUAL, "viewTest1"), + buildCriterion("viewField2", Condition.EQUAL, "viewTest2")))); Filter result = SearchUtils.combineFilters(baseFilter, viewFilter); @@ -336,25 +203,11 @@ public static void testApplyViewToFilterV1Filter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field1") - .setValue("test1") - .setValues(new StringArray(ImmutableList.of("test1"))), - new Criterion() - .setField("field2") - .setValue("test2") - .setValues(new StringArray(ImmutableList.of("test2"))), - new Criterion() - .setField("viewField1") - .setValue("viewTest1") - .setValues( - new StringArray(ImmutableList.of("viewTest1"))), - new Criterion() - .setField("viewField2") - .setValue("viewTest2") - .setValues( - new StringArray( - ImmutableList.of("viewTest2"))))))))); + buildCriterion("field1", Condition.EQUAL, "test1"), + buildCriterion("field2", Condition.EQUAL, "test2"), + buildCriterion("viewField1", Condition.EQUAL, "viewTest1"), + buildCriterion( + "viewField2", Condition.EQUAL, "viewTest2"))))))); Assert.assertEquals(expectedResult, result); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolverTest.java index c009cf37c5397..8f638e4ff9257 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolverTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.view; import static com.linkedin.datahub.graphql.TestUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.testng.Assert.*; @@ -24,7 +25,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.service.ViewService; @@ -108,28 +108,17 @@ public void testGetSuccess() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setCondition(Condition.EQUAL) - .setField("test1") - .setValue( - "value1") // Unfortunate --- For - // backwards compat. - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setNegated(false), - new Criterion() - .setCondition(Condition.IN) - .setField("test2") - .setValue( - "value1") // Unfortunate --- For - // backwards compat. - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setNegated(true))))))))), + buildCriterion( + "test1", + Condition.EQUAL, + "value1", + "value2"), + buildCriterion( + "test2", + Condition.IN, + true, + "value1", + "value2"))))))))), Mockito.anyLong()); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolverTest.java index b5c0531db792b..8fa3b098f2041 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolverTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.view; import static com.linkedin.datahub.graphql.TestUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.testng.Assert.*; @@ -8,7 +9,6 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.DataHubViewType; import com.linkedin.datahub.graphql.generated.ListGlobalViewsInput; @@ -18,7 +18,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntity; @@ -55,16 +54,10 @@ public void testGetSuccessInput() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("type") - .setValue(DataHubViewType.GLOBAL.toString()) - .setValues( - new StringArray( - ImmutableList.of( - DataHubViewType.GLOBAL - .toString()))) - .setCondition(Condition.EQUAL) - .setNegated(false)))))))), + buildCriterion( + "type", + Condition.EQUAL, + DataHubViewType.GLOBAL.toString())))))))), Mockito.any(), Mockito.eq(0), Mockito.eq(20))) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolverTest.java index 85d24f9251eaa..278aeef7b8da4 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolverTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.view; import static com.linkedin.datahub.graphql.TestUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.testng.Assert.*; @@ -8,7 +9,6 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.DataHubViewType; import com.linkedin.datahub.graphql.generated.ListMyViewsInput; @@ -17,7 +17,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntity; @@ -57,25 +56,14 @@ public void testGetSuccessInput1() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("createdBy") - .setValue(TEST_USER.toString()) - .setValues( - new StringArray( - ImmutableList.of( - TEST_USER.toString()))) - .setCondition(Condition.EQUAL) - .setNegated(false), - new Criterion() - .setField("type") - .setValue(DataHubViewType.GLOBAL.toString()) - .setValues( - new StringArray( - ImmutableList.of( - DataHubViewType.GLOBAL - .toString()))) - .setCondition(Condition.EQUAL) - .setNegated(false)))))))), + buildCriterion( + "createdBy", + Condition.EQUAL, + TEST_USER.toString()), + buildCriterion( + "type", + Condition.EQUAL, + DataHubViewType.GLOBAL.toString())))))))), Mockito.any(), Mockito.eq(0), Mockito.eq(20))) @@ -123,15 +111,10 @@ public void testGetSuccessInput2() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("createdBy") - .setValue(TEST_USER.toString()) - .setValues( - new StringArray( - ImmutableList.of( - TEST_USER.toString()))) - .setCondition(Condition.EQUAL) - .setNegated(false)))))))), + buildCriterion( + "createdBy", + Condition.EQUAL, + TEST_USER.toString())))))))), Mockito.any(), Mockito.eq(0), Mockito.eq(20))) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolverTest.java index 86a502b40b936..45c2c31e3159b 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolverTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.view; import static com.linkedin.datahub.graphql.TestUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.testng.Assert.*; @@ -30,7 +31,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.service.ViewService; @@ -109,28 +109,17 @@ public void testGetSuccessGlobalViewIsCreator() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setCondition(Condition.EQUAL) - .setField("test1") - .setValue( - "value1") // Unfortunate --- For - // backwards compat. - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setNegated(false), - new Criterion() - .setCondition(Condition.IN) - .setField("test2") - .setValue( - "value1") // Unfortunate --- For - // backwards compat. - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setNegated(true))))))))), + buildCriterion( + "test1", + Condition.EQUAL, + "value1", + "value2"), + buildCriterion( + "test2", + Condition.IN, + true, + "value1", + "value2"))))))))), Mockito.anyLong()); } @@ -173,28 +162,17 @@ public void testGetSuccessGlobalViewManageGlobalViews() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setCondition(Condition.EQUAL) - .setField("test1") - .setValue( - "value1") // Unfortunate --- For - // backwards compat. - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setNegated(false), - new Criterion() - .setCondition(Condition.IN) - .setField("test2") - .setValue( - "value1") // Unfortunate --- For - // backwards compat. - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setNegated(true))))))))), + buildCriterion( + "test1", + Condition.EQUAL, + "value1", + "value2"), + buildCriterion( + "test2", + Condition.IN, + true, + "value1", + "value2"))))))))), Mockito.anyLong()); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java index d142be1321a5c..c33aa629138f1 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.view; import static com.linkedin.datahub.graphql.TestUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.testng.Assert.*; @@ -22,7 +23,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.service.ViewService; @@ -148,26 +148,14 @@ public void testMapDefinition() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setNegated(false) - .setValues( - new StringArray( - ImmutableList.of("value1", "value2"))) - .setValue("value1") // Disgraceful - .setField("test1") // Consider whether we - // should NOT go through - // the keyword mapping. - .setCondition(Condition.IN), - new Criterion() - .setNegated(true) - .setValues( - new StringArray( - ImmutableList.of("value3", "value4"))) - .setValue("value3") // Disgraceful - .setField("test2") // Consider whether we - // should NOT go through - // the keyword mapping. - .setCondition(Condition.CONTAIN)))))))); + buildCriterion( + "test1", Condition.IN, "value1", "value2"), + buildCriterion( + "test2", + Condition.CONTAIN, + true, + "value3", + "value4")))))))); assertEquals(ViewUtils.mapDefinition(input, mock(AspectRetriever.class)), expectedResult); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/view/DataHubViewTypeTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/view/DataHubViewTypeTest.java index 685cccf27ccc0..8f3750c39f670 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/view/DataHubViewTypeTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/view/DataHubViewTypeTest.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.types.view; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.testng.Assert.*; @@ -25,7 +26,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.r2.RemoteInvocationException; @@ -74,13 +74,11 @@ public class DataHubViewTypeTest { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setField("test") - .setCondition(Condition.EQUAL)))))))) + buildCriterion( + "test", + Condition.EQUAL, + "value1", + "value2")))))))) .setEntityTypes( new StringArray( ImmutableList.of( @@ -110,38 +108,30 @@ public class DataHubViewTypeTest { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setField("test") - .setCondition(Condition.EQUAL), - new Criterion() - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setField("test2") - .setCondition(Condition.EQUAL)))), + buildCriterion( + "test", + Condition.EQUAL, + "value1", + "value2"), + buildCriterion( + "test2", + Condition.EQUAL, + "value1", + "value2")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setField("test2") - .setCondition(Condition.EQUAL), - new Criterion() - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setField("test2") - .setCondition(Condition.EQUAL)))))))) + buildCriterion( + "test2", + Condition.EQUAL, + "value1", + "value2"), + buildCriterion( + "test2", + Condition.EQUAL, + "value1", + "value2")))))))) .setEntityTypes( new StringArray( ImmutableList.of( diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java index dea98c5cbcb13..8b33e4e7c2164 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/SystemUpdateConfig.java @@ -12,12 +12,13 @@ import com.linkedin.gms.factory.kafka.common.TopicConventionFactory; import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory; import com.linkedin.gms.factory.search.BaseElasticSearchComponentsFactory; -import com.linkedin.metadata.aspect.GraphRetriever; import com.linkedin.metadata.config.kafka.KafkaConfiguration; import com.linkedin.metadata.dao.producer.KafkaEventProducer; import com.linkedin.metadata.dao.producer.KafkaHealthChecker; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.EntityServiceAspectRetriever; +import com.linkedin.metadata.graph.GraphService; +import com.linkedin.metadata.graph.SystemGraphRetriever; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.SearchServiceSearchRetriever; @@ -145,7 +146,7 @@ protected OperationContext javaSystemOperationContext( @Nonnull final EntityRegistry entityRegistry, @Nonnull final EntityService entityService, @Nonnull final RestrictedService restrictedService, - @Nonnull final GraphRetriever graphRetriever, + @Nonnull final GraphService graphService, @Nonnull final SearchService searchService, @Qualifier("baseElasticSearchComponents") BaseElasticSearchComponentsFactory.BaseElasticSearchComponents components) { @@ -159,6 +160,9 @@ protected OperationContext javaSystemOperationContext( SearchServiceSearchRetriever searchServiceSearchRetriever = SearchServiceSearchRetriever.builder().searchService(searchService).build(); + SystemGraphRetriever systemGraphRetriever = + SystemGraphRetriever.builder().graphService(graphService).build(); + OperationContext systemOperationContext = OperationContext.asSystem( operationContextConfig, @@ -168,11 +172,12 @@ protected OperationContext javaSystemOperationContext( components.getIndexConvention(), RetrieverContext.builder() .aspectRetriever(entityServiceAspectRetriever) - .graphRetriever(graphRetriever) + .graphRetriever(systemGraphRetriever) .searchRetriever(searchServiceSearchRetriever) .build()); entityServiceAspectRetriever.setSystemOperationContext(systemOperationContext); + systemGraphRetriever.setSystemOperationContext(systemOperationContext); searchServiceSearchRetriever.setSystemOperationContext(systemOperationContext); return systemOperationContext; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexDataJobViaNodesCLLConfig.java similarity index 85% rename from datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java rename to datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexDataJobViaNodesCLLConfig.java index 4956254062ff9..a973876c6715f 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/ReindexDataJobViaNodesCLLConfig.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexDataJobViaNodesCLLConfig.java @@ -1,7 +1,8 @@ -package com.linkedin.datahub.upgrade.config; +package com.linkedin.datahub.upgrade.config.graph; +import com.linkedin.datahub.upgrade.config.SystemUpdateCondition; import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; -import com.linkedin.datahub.upgrade.system.vianodes.ReindexDataJobViaNodesCLL; +import com.linkedin.datahub.upgrade.system.graph.vianodes.ReindexDataJobViaNodesCLL; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityService; import io.datahubproject.metadata.context.OperationContext; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexEdgeStatusConfig.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexEdgeStatusConfig.java new file mode 100644 index 0000000000000..97715573eb51f --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/config/graph/ReindexEdgeStatusConfig.java @@ -0,0 +1,31 @@ +package com.linkedin.datahub.upgrade.config.graph; + +import com.linkedin.datahub.upgrade.config.SystemUpdateCondition; +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.datahub.upgrade.system.graph.edgestatus.ReindexEdgeStatus; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Conditional; +import org.springframework.context.annotation.Configuration; + +@Configuration +@Conditional(SystemUpdateCondition.NonBlockingSystemUpdateCondition.class) +public class ReindexEdgeStatusConfig { + + @Bean + public NonBlockingSystemUpgrade reindexEdgeStatus( + final OperationContext opContext, + final EntityService entityService, + final AspectDao aspectDao, + @Value("${elasticsearch.search.graph.graphStatusEnabled}") final boolean featureEnabled, + @Value("${systemUpdate.edgeStatus.enabled}") final boolean enabled, + @Value("${systemUpdate.edgeStatus.batchSize}") final Integer batchSize, + @Value("${systemUpdate.edgeStatus.delayMs}") final Integer delayMs, + @Value("${systemUpdate.edgeStatus.limit}") final Integer limit) { + return new ReindexEdgeStatus( + opContext, entityService, aspectDao, featureEnabled && enabled, batchSize, delayMs, limit); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/browsepaths/BackfillBrowsePathsV2Step.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/browsepaths/BackfillBrowsePathsV2Step.java index e6213a164febf..c197c6fd9c610 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/browsepaths/BackfillBrowsePathsV2Step.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/browsepaths/BackfillBrowsePathsV2Step.java @@ -1,6 +1,9 @@ package com.linkedin.datahub.upgrade.system.browsepaths; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; +import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; +import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion; import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; import com.google.common.collect.ImmutableList; @@ -9,7 +12,6 @@ import com.linkedin.common.BrowsePathsV2; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.data.template.StringArray; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; @@ -152,13 +154,10 @@ private String backfillBrowsePathsV2(String entityType, AuditStamp auditStamp, S private Filter backfillBrowsePathsV2Filter() { // Condition: has `browsePaths` AND does NOT have `browsePathV2` - Criterion missingBrowsePathV2 = new Criterion(); - missingBrowsePathV2.setCondition(Condition.IS_NULL); - missingBrowsePathV2.setField("browsePathV2"); + Criterion missingBrowsePathV2 = buildIsNullCriterion("browsePathV2"); + // Excludes entities without browsePaths - Criterion hasBrowsePathV1 = new Criterion(); - hasBrowsePathV1.setCondition(Condition.EXISTS); - hasBrowsePathV1.setField("browsePaths"); + Criterion hasBrowsePathV1 = buildExistsCriterion("browsePaths"); CriterionArray criterionArray = new CriterionArray(); criterionArray.add(missingBrowsePathV2); @@ -177,13 +176,8 @@ private Filter backfillBrowsePathsV2Filter() { private Filter backfillDefaultBrowsePathsV2Filter() { // Condition: has default `browsePathV2` - Criterion hasDefaultBrowsePathV2 = new Criterion(); - hasDefaultBrowsePathV2.setCondition(Condition.EQUAL); - hasDefaultBrowsePathV2.setField("browsePathV2"); - StringArray values = new StringArray(); - values.add(DEFAULT_BROWSE_PATH_V2); - hasDefaultBrowsePathV2.setValues(values); - hasDefaultBrowsePathV2.setValue(DEFAULT_BROWSE_PATH_V2); // not used, but required field? + Criterion hasDefaultBrowsePathV2 = + buildCriterion("browsePathV2", Condition.EQUAL, DEFAULT_BROWSE_PATH_V2); CriterionArray criterionArray = new CriterionArray(); criterionArray.add(hasDefaultBrowsePathV2); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexEdgeStatus.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexEdgeStatus.java new file mode 100644 index 0000000000000..6b7286a6a0639 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexEdgeStatus.java @@ -0,0 +1,50 @@ +package com.linkedin.datahub.upgrade.system.graph.edgestatus; + +import com.google.common.collect.ImmutableList; +import com.linkedin.datahub.upgrade.UpgradeStep; +import com.linkedin.datahub.upgrade.system.NonBlockingSystemUpgrade; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; + +/** + * A job that reindexes all status aspects as part of the graph edges containing status information. + * This is required to make sure previously written status information is present in the graph + * index. + */ +@Slf4j +public class ReindexEdgeStatus implements NonBlockingSystemUpgrade { + + private final List _steps; + + public ReindexEdgeStatus( + @Nonnull OperationContext opContext, + EntityService entityService, + AspectDao aspectDao, + boolean enabled, + Integer batchSize, + Integer batchDelayMs, + Integer limit) { + if (enabled) { + _steps = + ImmutableList.of( + new ReindexReindexEdgeStatusStep( + opContext, entityService, aspectDao, batchSize, batchDelayMs, limit)); + } else { + _steps = ImmutableList.of(); + } + } + + @Override + public String id() { + return this.getClass().getName(); + } + + @Override + public List steps() { + return _steps; + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexReindexEdgeStatusStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexReindexEdgeStatusStep.java new file mode 100644 index 0000000000000..6543f82e74563 --- /dev/null +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/edgestatus/ReindexReindexEdgeStatusStep.java @@ -0,0 +1,56 @@ +package com.linkedin.datahub.upgrade.system.graph.edgestatus; + +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; + +import com.linkedin.datahub.upgrade.UpgradeContext; +import com.linkedin.datahub.upgrade.system.AbstractMCLStep; +import com.linkedin.metadata.entity.AspectDao; +import com.linkedin.metadata.entity.EntityService; +import io.datahubproject.metadata.context.OperationContext; +import javax.annotation.Nonnull; +import lombok.extern.slf4j.Slf4j; +import org.jetbrains.annotations.Nullable; + +@Slf4j +public class ReindexReindexEdgeStatusStep extends AbstractMCLStep { + + public ReindexReindexEdgeStatusStep( + OperationContext opContext, + EntityService entityService, + AspectDao aspectDao, + Integer batchSize, + Integer batchDelayMs, + Integer limit) { + super(opContext, entityService, aspectDao, batchSize, batchDelayMs, limit); + } + + @Override + public String id() { + return "edge-status-reindex-v1"; + } + + @Nonnull + @Override + protected String getAspectName() { + return STATUS_ASPECT_NAME; + } + + @Nullable + @Override + protected String getUrnLike() { + return null; + } + + @Override + /** + * Returns whether the upgrade should be skipped. Uses previous run history or the environment + * variable to determine whether to skip. + */ + public boolean skip(UpgradeContext context) { + boolean envFlagRecommendsSkip = Boolean.parseBoolean(System.getenv("SKIP_REINDEX_EDGE_STATUS")); + if (envFlagRecommendsSkip) { + log.info("Environment variable SKIP_REINDEX_EDGE_STATUS is set to true. Skipping."); + } + return (super.skip(context) || envFlagRecommendsSkip); + } +} diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLL.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLL.java similarity index 95% rename from datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLL.java rename to datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLL.java index fc0b44f57ab49..7a4ca9586f155 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLL.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLL.java @@ -1,4 +1,4 @@ -package com.linkedin.datahub.upgrade.system.vianodes; +package com.linkedin.datahub.upgrade.system.graph.vianodes; import com.google.common.collect.ImmutableList; import com.linkedin.datahub.upgrade.UpgradeStep; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLLStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLLStep.java similarity index 96% rename from datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLLStep.java rename to datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLLStep.java index cf580670ee3a9..e3e07f99bb1ee 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/vianodes/ReindexDataJobViaNodesCLLStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/graph/vianodes/ReindexDataJobViaNodesCLLStep.java @@ -1,4 +1,4 @@ -package com.linkedin.datahub.upgrade.system.vianodes; +package com.linkedin.datahub.upgrade.system.graph.vianodes; import static com.linkedin.metadata.Constants.*; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/policyfields/BackfillPolicyFieldsStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/policyfields/BackfillPolicyFieldsStep.java index ad28e6b6382d4..42361ebe59b85 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/policyfields/BackfillPolicyFieldsStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/policyfields/BackfillPolicyFieldsStep.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.upgrade.system.policyfields; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion; import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; import com.google.common.collect.ImmutableList; @@ -17,7 +18,6 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.boot.BootstrapStep; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; import com.linkedin.metadata.query.filter.Criterion; @@ -246,9 +246,7 @@ private Optional> ingestPolicyFields( @NotNull private static ConjunctiveCriterion getCriterionForMissingField(String field) { - final Criterion missingPrivilegesField = new Criterion(); - missingPrivilegesField.setCondition(Condition.IS_NULL); - missingPrivilegesField.setField(field); + final Criterion missingPrivilegesField = buildIsNullCriterion(field); final CriterionArray criterionArray = new CriterionArray(); criterionArray.add(missingPrivilegesField); diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java index 55a52f072a0ca..df27d33f3a117 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/DatahubUpgradeNonBlockingTest.java @@ -9,7 +9,7 @@ import com.linkedin.datahub.upgrade.impl.DefaultUpgradeManager; import com.linkedin.datahub.upgrade.system.SystemUpdateNonBlocking; -import com.linkedin.datahub.upgrade.system.vianodes.ReindexDataJobViaNodesCLL; +import com.linkedin.datahub.upgrade.system.graph.vianodes.ReindexDataJobViaNodesCLL; import com.linkedin.metadata.boot.kafka.MockSystemUpdateDeserializer; import com.linkedin.metadata.boot.kafka.MockSystemUpdateSerializer; import com.linkedin.metadata.config.kafka.KafkaConfiguration; diff --git a/datahub-web-react/build.gradle b/datahub-web-react/build.gradle index 05af6871715ce..b9fffce173c5c 100644 --- a/datahub-web-react/build.gradle +++ b/datahub-web-react/build.gradle @@ -19,7 +19,7 @@ node { version = '21.2.0' // Version of Yarn to use. - yarnVersion = '1.22.21' + yarnVersion = '1.22.22' // Base URL for fetching node distributions (set nodeDistBaseUrl if you have a mirror). if (project.hasProperty('nodeDistBaseUrl')) { @@ -43,7 +43,7 @@ node { Wrappers around Yarn Tasks. */ task yarnInstall(type: YarnTask) { - args = ['install'] + args = ['install', '--network-timeout', '300000'] // The node_modules directory can contain built artifacts, so // it's not really safe to cache it. diff --git a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/contract/utils.tsx b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/contract/utils.tsx index cc2e1bb7b386e..537750ec279f9 100644 --- a/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/contract/utils.tsx +++ b/datahub-web-react/src/app/entity/shared/tabs/Dataset/Validations/contract/utils.tsx @@ -111,4 +111,5 @@ export const DATA_QUALITY_ASSERTION_TYPES = new Set([ AssertionType.Sql, AssertionType.Field, AssertionType.Dataset, + AssertionType.Custom, ]); diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index 17ad6f881b0ab..7ce4082c42f61 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -1012,7 +1012,7 @@ fragment domainEntitiesFields on Domain { entities(input: { start: 0, count: 0 }) { total } - dataProducts: entities(input: { start: 0, count: 0, filters: [{ field: "_entityType", value: "DATA_PRODUCT" }] }) { + dataProducts: entities(input: { start: 0, count: 0, filters: [{ field: "_entityType", values: "DATA_PRODUCT" }] }) { total } children: relationships(input: { types: ["IsPartOf"], direction: INCOMING, start: 0, count: 0 }) { diff --git a/docker/datahub-frontend/env/docker.env b/docker/datahub-frontend/env/docker.env index 7e490813c93cf..74e7680f1b5ec 100644 --- a/docker/datahub-frontend/env/docker.env +++ b/docker/datahub-frontend/env/docker.env @@ -16,8 +16,8 @@ JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf # SSL_TRUSTSTORE_TYPE=jks # SSL_TRUSTSTORE_PASSWORD=MyTruststorePassword -# Uncomment to enable Metadata Service Authentication -# METADATA_SERVICE_AUTH_ENABLED=true +# Uncomment to disable Metadata Service Authentication +# METADATA_SERVICE_AUTH_ENABLED=false # Uncomment & populate these configs to enable OIDC SSO in React application. # Required OIDC configs diff --git a/docker/datahub-gms/env/docker.env b/docker/datahub-gms/env/docker.env index 59fc4bdde02ff..9a2ce30988805 100644 --- a/docker/datahub-gms/env/docker.env +++ b/docker/datahub-gms/env/docker.env @@ -27,8 +27,8 @@ MCE_CONSUMER_ENABLED=true PE_CONSUMER_ENABLED=true UI_INGESTION_ENABLED=true -# Uncomment to enable Metadata Service Authentication -METADATA_SERVICE_AUTH_ENABLED=false +# Uncomment to disable Metadata Service Authentication +# METADATA_SERVICE_AUTH_ENABLED=false # Uncomment to disable persistence of client-side analytics events # DATAHUB_ANALYTICS_ENABLED=false diff --git a/docker/datahub-ingestion-base/Dockerfile b/docker/datahub-ingestion-base/Dockerfile index 08cf2efdcb6a1..6e885b8349a8b 100644 --- a/docker/datahub-ingestion-base/Dockerfile +++ b/docker/datahub-ingestion-base/Dockerfile @@ -8,8 +8,6 @@ ARG DEBIAN_REPO_URL=https://deb.debian.org/debian ARG UBUNTU_REPO_URL=http://ports.ubuntu.com/ubuntu-ports ARG PIP_MIRROR_URL=https://pypi.python.org/simple -FROM powerman/dockerize:0.19 AS dockerize-binary - FROM ubuntu:22.04 AS base ARG GITHUB_REPO_URL @@ -44,6 +42,7 @@ RUN apt-get update && apt-get upgrade -y \ krb5-config \ libkrb5-dev \ librdkafka-dev \ + git \ wget \ curl \ zip \ diff --git a/docker/docker-compose-with-cassandra.yml b/docker/docker-compose-with-cassandra.yml index de766f76cb626..6709aee98d697 100644 --- a/docker/docker-compose-with-cassandra.yml +++ b/docker/docker-compose-with-cassandra.yml @@ -39,6 +39,8 @@ services: context: ../ dockerfile: docker/datahub-gms/Dockerfile env_file: ./datahub-gms/env/docker.cassandra.env + environment: + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} healthcheck: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 20s diff --git a/docker/docker-compose-without-neo4j.override.yml b/docker/docker-compose-without-neo4j.override.yml index 09a27fb4573de..11d7cd0c0c87b 100644 --- a/docker/docker-compose-without-neo4j.override.yml +++ b/docker/docker-compose-without-neo4j.override.yml @@ -6,6 +6,7 @@ services: environment: - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} depends_on: datahub-upgrade: condition: service_completed_successfully diff --git a/docker/docker-compose-without-neo4j.postgres.override.yml b/docker/docker-compose-without-neo4j.postgres.override.yml index dd7590ffe09b9..b81fb6435c297 100644 --- a/docker/docker-compose-without-neo4j.postgres.override.yml +++ b/docker/docker-compose-without-neo4j.postgres.override.yml @@ -9,6 +9,7 @@ services: environment: - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} depends_on: datahub-upgrade: condition: service_completed_successfully diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml index 748a2cc9e0416..53fcc77c6e8f3 100644 --- a/docker/docker-compose-without-neo4j.yml +++ b/docker/docker-compose-without-neo4j.yml @@ -42,6 +42,7 @@ services: env_file: datahub-gms/env/docker-without-neo4j.env environment: - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true} + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} healthcheck: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 90s diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 7974b66ec87db..2202f362abd99 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -40,7 +40,7 @@ services: - SKIP_ELASTICSEARCH_CHECK=false - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-dev} - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} - - METADATA_SERVICE_AUTH_ENABLED=false + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} - JAVA_TOOL_OPTIONS=-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001 - BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE=false - SEARCH_SERVICE_ENABLE_CACHE=false diff --git a/docker/docker-compose.override.yml b/docker/docker-compose.override.yml index 0aae870078be1..51fbe0060aa5f 100644 --- a/docker/docker-compose.override.yml +++ b/docker/docker-compose.override.yml @@ -8,6 +8,7 @@ services: - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} - GRAPH_SERVICE_IMPL=${GRAPH_SERVICE_IMPL:-elasticsearch} + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins datahub-upgrade: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index ae55861580bec..5430a8a6fcd5b 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -35,6 +35,7 @@ services: image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} environment: - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true} + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 build: diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml index 834d55096468f..046ab96cf3002 100644 --- a/docker/quickstart/docker-compose-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-m1.quickstart.yml @@ -97,7 +97,7 @@ services: - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - MAE_CONSUMER_ENABLED=true - MCE_CONSUMER_ENABLED=true - - METADATA_SERVICE_AUTH_ENABLED=false + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} - NEO4J_HOST=http://neo4j:7474 - NEO4J_PASSWORD=datahub - NEO4J_URI=bolt://neo4j diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index 47fb50f78e4f0..6295572aac98f 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -97,6 +97,7 @@ services: - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - MAE_CONSUMER_ENABLED=true - MCE_CONSUMER_ENABLED=true + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} - PE_CONSUMER_ENABLED=true - UI_INGESTION_ENABLED=true healthcheck: diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 3fa13a9e56b42..ed5f203ff4d05 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -97,6 +97,7 @@ services: - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - MAE_CONSUMER_ENABLED=true - MCE_CONSUMER_ENABLED=true + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} - PE_CONSUMER_ENABLED=true - UI_INGESTION_ENABLED=true healthcheck: diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index c63b6d1d61b03..66616be98bec1 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -97,7 +97,7 @@ services: - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - MAE_CONSUMER_ENABLED=true - MCE_CONSUMER_ENABLED=true - - METADATA_SERVICE_AUTH_ENABLED=false + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} - NEO4J_HOST=http://neo4j:7474 - NEO4J_PASSWORD=datahub - NEO4J_URI=bolt://neo4j diff --git a/docs-website/build.gradle b/docs-website/build.gradle index 3b78804eafd9d..1860b4a49ae23 100644 --- a/docs-website/build.gradle +++ b/docs-website/build.gradle @@ -17,7 +17,7 @@ node { version = '21.2.0' // Version of Yarn to use. - yarnVersion = '1.22.1' + yarnVersion = '1.22.22' // Base URL for fetching node distributions (set nodeDistBaseUrl if you have a mirror). if (project.hasProperty('nodeDistBaseUrl')) { @@ -64,9 +64,9 @@ task generateJsonSchema(type: Exec, dependsOn: [':metadata-ingestion:docGen']) { task yarnInstall(type: YarnTask) { logger.info('CI = "{}"', System.env.CI) if (System.env.CI != null && System.env.CI == "true") { - args = ['install','--frozen-lockfile'] + args = ['install', '--frozen-lockfile', '--network-timeout', '300000'] } else { - args = ['install'] + args = ['install', '--network-timeout', '300000'] } // The node_modules directory can contain built artifacts, so diff --git a/docs-website/docusaurus.config.js b/docs-website/docusaurus.config.js index 88a2c79a450ee..5255579628a7f 100644 --- a/docs-website/docusaurus.config.js +++ b/docs-website/docusaurus.config.js @@ -65,6 +65,12 @@ module.exports = { // isCloseable: false, // }, // }), + colorMode: { + // Only support light mode. + defaultMode: 'light', + disableSwitch: true, + respectPrefersColorScheme: false, + }, navbar: { title: null, logo: { @@ -74,21 +80,15 @@ module.exports = { }, items: [ { - to: "docs/", - activeBasePath: "docs", - label: "Docs", - position: "right", - }, - { - to: "/cloud", + to: "cloud/", activeBasePath: "cloud", - html: "Cloud", + label: "Cloud", position: "right", }, { - to: "/learn", - activeBasePath: "learn", - label: "Learn", + to: "docs/", + activeBasePath: "docs", + label: "Docs", position: "right", }, { @@ -99,113 +99,79 @@ module.exports = { }, { type: "dropdown", - label: "Community", + activeBasePath: "learn", + label: "Learn", position: "right", items: [ { - to: "/slack", - label: "Join Slack", - }, - { - href: "https://forum.datahubproject.io/", - label: "Community Forum", + to: "/learn", + label: "Use Cases", }, { - to: "/events", - label: "Events", + to: "/adoption-stories", + label: "Adoption Stories", }, { - to: "/champions", - label: "Champions", + href: "https://blog.datahubproject.io/", + label: "Blog", }, { - label: "Share Your Journey", - href: "/customer-stories-survey", + href: "https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w", + label: "YouTube", }, ], }, { type: "dropdown", - label: "Resources", + label: "Community", position: "right", items: [ { - href: "https://demo.datahubproject.io/", - label: "Demo", - }, - { - href: "https://blog.datahubproject.io/", - label: "Blog", - }, - { - href: "https://feature-requests.datahubproject.io/roadmap", - label: "Roadmap", + to: "/slack", + label: "Join Slack", }, { - href: "https://github.com/datahub-project/datahub", - label: "GitHub", + href: "https://forum.datahubproject.io/", + label: "Community Forum", }, { - href: "https://www.youtube.com/channel/UC3qFQC5IiwR5fvWEqi_tJ5w", - label: "YouTube", + to: "/events", + label: "Events", }, { - href: "/adoption-stories", - label: "Adoption Stories", + to: "/champions", + label: "Champions", }, { - href: "https://www.youtube.com/playlist?list=PLdCtLs64vZvErAXMiqUYH9e63wyDaMBgg", - label: "DataHub Basics", + label: "Share Your Journey", + href: "/customer-stories-survey", }, ], }, { - type: "docsVersionDropdown", - position: "left", - dropdownActiveClassDisabled: true, - dropdownItemsAfter: [ - { - type: 'html', - value: '', - }, - { - type: 'html', - value: '', - }, - { - value: ` - 0.13.0 - - - `, - type: "html", - }, - { - value: ` - 0.12.1 - - - `, - type: "html", - }, - { - value: ` - 0.11.0 - - - `, - type: "html", - }, - { - value: ` - 0.10.5 - - - `, - type: "html", - }, - ], + href: "/slack", + html: ` + + + `, + position: "right", }, + { + href: "/cloud", + html: ` + +
Try DataHub Cloud Free
+ `, + position: "right", + } ], }, footer: { @@ -245,7 +211,7 @@ module.exports = { }, { label: "Adoption", - to: "docs/#adoption", + href: "/adoption-stories", }, ], }, @@ -329,6 +295,7 @@ module.exports = { require.resolve("./src/styles/global.scss"), require.resolve("./src/styles/sphinx.scss"), require.resolve("./src/styles/config-table.scss"), + require.resolve("./src/components/SecondNavbar/styles.module.scss"), ], }, pages: { diff --git a/docs-website/src/components/SecondNavbar/SecondNavbar.js b/docs-website/src/components/SecondNavbar/SecondNavbar.js new file mode 100644 index 0000000000000..e17a3bceb1d93 --- /dev/null +++ b/docs-website/src/components/SecondNavbar/SecondNavbar.js @@ -0,0 +1,55 @@ +import React from 'react'; +import { Link, useLocation } from 'react-router-dom'; +import clsx from 'clsx'; +import { useColorMode } from '@docusaurus/theme-common'; +import SearchBar from '@theme/SearchBar'; +import ColorModeToggle from '@theme/ColorModeToggle'; +import styles from './styles.module.scss'; +import DocsVersionDropdownNavbarItem from '@theme/NavbarItem/DocsVersionDropdownNavbarItem'; + +function SecondNavbarContent() { + const { colorMode, setColorMode } = useColorMode(); + const location = useLocation(); + const isDocsPath = location.pathname.startsWith('/docs'); + if (!isDocsPath) { + return null; + } + + return ( +
+
+
+ +
+
+
+ setColorMode(colorMode === 'dark' ? 'light' : 'dark')} + /> +
+
+ +
+
+
+
+ ); +} + +function SecondNavbar() { + return ( + + ); +} + +export default SecondNavbar; \ No newline at end of file diff --git a/docs-website/src/components/SecondNavbar/styles.module.scss b/docs-website/src/components/SecondNavbar/styles.module.scss new file mode 100644 index 0000000000000..eed10a6f14c2c --- /dev/null +++ b/docs-website/src/components/SecondNavbar/styles.module.scss @@ -0,0 +1,64 @@ +.secondNavbar { + background-color: #fff; + border-bottom: 1px solid #eaeaea; + z-index: 10; + color: black; + + &.darkMode { + background-color: #000; + color: #fff; + } +} + +.container, +.coreCloudSwitch, +.docsSwitchButton { + display: flex; + align-items: center; + height: 50px; +} + +.container { + padding: 0rem 1rem; +} + +.coreCloudSwitch { + width: var(--doc-sidebar-width); +} + +.docsSwitchButton { + padding: 0.5rem 1rem; + font-weight: 500; + font-size: 1.1rem; + color: black; + text-decoration: none; + + &:hover { + cursor: pointer; + text-decoration: none; + color: #007bff; + } +} + +.activeButton { + border-bottom: 2px solid #007bff; +} + +.darkMode .docsSwitchButton { + color: white; + + &:hover { + color: #4db5ff; + } +} + +.searchBox, +.colorModeToggle { + padding: 0.5rem 1rem; +} + +.navbarItemsRight { + display: flex; + margin-left: auto; + align-items: center; +} \ No newline at end of file diff --git a/docs-website/src/pages/_components/CaseStudy/caseStudyContent.js b/docs-website/src/pages/_components/CaseStudy/caseStudyContent.js index 524dbfd1b19d6..47c379027da81 100644 --- a/docs-website/src/pages/_components/CaseStudy/caseStudyContent.js +++ b/docs-website/src/pages/_components/CaseStudy/caseStudyContent.js @@ -26,9 +26,29 @@ const caseStudyData = [ tag: "Technology", backgroundImage: "https://www.notion.so/cdn-cgi/image/format=webp,width=1920/front-static/pages/product/super-duper/hero-illo.png", - image: "https://boost.space/wp-content/uploads/2023/08/notion.png", + image: "https://datahubproject.io/img/logos/scrollingCompanies/notion.png", link: "https://datahubproject.io/adoption-stories/#notion", }, + { + title: "Building on DataHub", + description: + "Why Pinterest chose DataHub to serve their needs.", + tag: "Technology", + backgroundImage: + "https://newsroom-archive.pinterest.com/static/images/pin32189_primary-image_hero_0db73.png", + image: "https://datahubproject.io/img/logos/companies/pinterest.png", + link: "https://www.youtube.com/watch?v=YoxTg8tQSwg", + }, + { + title: "Reliable Data Products", + description: + "How Miro leverages DataHub Cloud to deliver reliable data products.", + tag: "Technology", + backgroundImage: + "https://miro.com/blog/wp-content/uploads/2024/08/header-diagramming-s4-02.png", + image: "/img/logos/companies/miro.png", + link: "https://miro.com/careers/life-at-miro/tech/data-products-reliability-the-power-of-metadata/", + }, { title: "Working with Petabyte Scale Healthcare Data", description: @@ -36,7 +56,7 @@ const caseStudyData = [ tag: "Healthcare", backgroundImage: "https://opensource.optum.com/static/images/mesh-overview-e26ea2aaa8d3dbb1f1771b50f4e31449.png", - image: "/img/logos/companies/optum.png", + image: "https://datahubproject.io/img/logos/companies/optum.png", link: "https://datahubproject.io/adoption-stories/#optum", }, { diff --git a/docs-website/src/pages/champions/_components/ChampionQualityCardsSection/championqualitycardssection.module.scss b/docs-website/src/pages/champions/_components/ChampionQualityCardsSection/championqualitycardssection.module.scss index 9601938b07709..cb632fee5a0c2 100644 --- a/docs-website/src/pages/champions/_components/ChampionQualityCardsSection/championqualitycardssection.module.scss +++ b/docs-website/src/pages/champions/_components/ChampionQualityCardsSection/championqualitycardssection.module.scss @@ -1,3 +1,9 @@ +.subtitle { + font-size: 1.7rem; + font-weight: 500; + margin: 1.5rem 0; +} + .section { margin: 2rem 3rem 3rem 3rem; } diff --git a/docs-website/src/pages/champions/_components/ChampionQualityCardsSection/index.js b/docs-website/src/pages/champions/_components/ChampionQualityCardsSection/index.js index 9caed95973dc0..2890cd7838112 100644 --- a/docs-website/src/pages/champions/_components/ChampionQualityCardsSection/index.js +++ b/docs-website/src/pages/champions/_components/ChampionQualityCardsSection/index.js @@ -6,7 +6,7 @@ import { CodeTwoTone, HeartTwoTone, SoundTwoTone } from "@ant-design/icons"; const ChampionQualityCardsSection = () => { return (
-

Our Champions...

+
Our Champions...
diff --git a/docs-website/src/pages/champions/index.js b/docs-website/src/pages/champions/index.js index b544defc47448..79ecf915e174d 100644 --- a/docs-website/src/pages/champions/index.js +++ b/docs-website/src/pages/champions/index.js @@ -372,7 +372,7 @@ function Champion() {
-

DataHub Champions

+
DataHub Champions

Recognizing community members who have made exceptional contributions to further the collective success of DataHub.

diff --git a/docs-website/src/pages/cloud/index.js b/docs-website/src/pages/cloud/index.js index 8ceab8aa04e4a..99768ca721078 100644 --- a/docs-website/src/pages/cloud/index.js +++ b/docs-website/src/pages/cloud/index.js @@ -34,6 +34,10 @@ function Home() {
+
+
Curious? Drop by and say hi!
+ Weekly Live Demos → +
diff --git a/docs-website/src/pages/cloud/styles.module.scss b/docs-website/src/pages/cloud/styles.module.scss index 66eafff4617db..0590b8baf12c8 100644 --- a/docs-website/src/pages/cloud/styles.module.scss +++ b/docs-website/src/pages/cloud/styles.module.scss @@ -11,9 +11,30 @@ } .bgSection { - background-color: #FAFAFA !important; - } + background-color: #F5F5F5 !important; +} +.weeklyDemoSection { + width: 100%; + padding: 80px 0; + display: flex; + justify-content: center; + flex-direction: column; + align-items: center; + background-color: #1890ff; + div { + color: white; + font-size: 1rem; + font-weight: 400; + opacity: .9; + } + a { + cursor: pointer; + color: white; + font-size: 1.2rem; + font-weight: 600; + } +} .hero { .button { diff --git a/docs-website/src/styles/global.scss b/docs-website/src/styles/global.scss index 701db3bc6ac88..222f4946b0382 100644 --- a/docs-website/src/styles/global.scss +++ b/docs-website/src/styles/global.scss @@ -262,26 +262,26 @@ div[class^="announcementBar"] { /* Hero */ -// .hero { -// padding: 5vh 0; - -// .hero__subtitle { -// font-size: 1.25em; -// max-width: 800px; - -// img { -// vertical-align: middle; -// margin-top: -0.3em; -// } -// } - -// .hero__content { -// text-align: center; -// padding: 2rem 0; -// height: 100%; -// display: flex; -// } -// } +.hero { + padding: 5vh 0; + + .hero__subtitle { + font-size: 1.25em; + max-width: 800px; + display: inline-block; + + img { + vertical-align: middle; + margin-top: -0.3em; + } + } + + .hero__content { + text-align: center; + padding: 2rem 0; + height: 100%; + } +} /* Sidebar Menu */ @@ -344,6 +344,11 @@ div[class^="announcementBar"] { } } +/* Hide Searchbar in Nav */ +.navbar--fixed-top .DocSearch { + display: none; +} + /* Search */ [data-theme="light"] .DocSearch { diff --git a/docs-website/src/theme/Layout/index.js b/docs-website/src/theme/Layout/index.js new file mode 100644 index 0000000000000..4ea66824ad2c1 --- /dev/null +++ b/docs-website/src/theme/Layout/index.js @@ -0,0 +1,14 @@ +import React from 'react'; +import Layout from '@theme-original/Layout'; +import SecondNavbar from '../../components/SecondNavbar/SecondNavbar'; + +export default function LayoutWrapper(props) { + return ( + <> + + + {props.children} + + + ); +} \ No newline at end of file diff --git a/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js b/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js index 661d64392e67f..ed083e4e0c398 100644 --- a/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js +++ b/docs-website/src/theme/NavbarItem/DocsVersionDropdownNavbarItem.js @@ -10,12 +10,13 @@ import DropdownNavbarItem from "@theme/NavbarItem/DropdownNavbarItem"; import styles from "./styles.module.scss"; const getVersionMainDoc = (version) => version.docs.find((doc) => doc.id === version.mainDocId); + export default function DocsVersionDropdownNavbarItem({ mobile, - docsPluginId, - dropdownActiveClassDisabled, - dropdownItemsBefore, - dropdownItemsAfter, + docsPluginId = 'default', + dropdownActiveClassDisabled = false, + dropdownItemsBefore = [], + dropdownItemsAfter = [], ...props }) { const { search, hash } = useLocation(); @@ -23,20 +24,60 @@ export default function DocsVersionDropdownNavbarItem({ const versions = useVersions(docsPluginId); const { savePreferredVersionName } = useDocsPreferredVersion(docsPluginId); const versionLinks = versions.map((version) => { - // We try to link to the same doc, in another version - // When not possible, fallback to the "main doc" of the version const versionDoc = activeDocContext.alternateDocVersions[version.name] ?? getVersionMainDoc(version); return { label: version.label, - // preserve ?search#hash suffix on version switches to: `${versionDoc.path}${search}${hash}`, isActive: () => version === activeDocContext.activeVersion, onClick: () => savePreferredVersionName(version.name), }; }); - const items = [...dropdownItemsBefore, ...versionLinks, ...dropdownItemsAfter]; + + const archivedVersions = [ + { + type: 'html', + value: '', + }, + { + type: 'html', + value: '', + }, + { + value: ` + 0.13.0 + + + `, + type: "html", + }, + { + value: ` + 0.12.1 + + + `, + type: "html", + }, + { + value: ` + 0.11.0 + + + `, + type: "html", + }, + { + value: ` + 0.10.5 + + + `, + type: "html", + }, + ]; + + const items = [...dropdownItemsBefore, ...versionLinks, ...archivedVersions, ...dropdownItemsAfter]; const dropdownVersion = useDocsVersionCandidates(docsPluginId)[0]; - // Mobile dropdown is handled a bit differently const dropdownLabel = mobile && items.length > 1 ? translate({ @@ -46,9 +87,7 @@ export default function DocsVersionDropdownNavbarItem({ }) : dropdownVersion.label; const dropdownTo = mobile && items.length > 1 ? undefined : getVersionMainDoc(dropdownVersion).path; - // We don't want to render a version dropdown with 0 or 1 item. If we build - // the site with a single docs version (onlyIncludeVersions: ['1.0.0']), - // We'd rather render a button instead of a dropdown + if (items.length <= 1) { return ( false : undefined} /> ); -} +} \ No newline at end of file diff --git a/docs-website/static/img/logos/companies/miro.png b/docs-website/static/img/logos/companies/miro.png new file mode 100644 index 0000000000000..b5d02ce33c60d Binary files /dev/null and b/docs-website/static/img/logos/companies/miro.png differ diff --git a/docs/api/tutorials/forms.md b/docs/api/tutorials/forms.md index eb555910f18eb..cf51f1579f1c8 100644 --- a/docs/api/tutorials/forms.md +++ b/docs/api/tutorials/forms.md @@ -77,7 +77,7 @@ For example, below file represents a form `123456` You can see the full example ```yaml - id: 123456 # urn: "urn:li:form:123456" # optional if id is provided - type: VERIFICATION # Supported Types: DOCUMENTATION, VERIFICATION + type: VERIFICATION # Supported Types: COMPLETION(DOCUMENTATION), VERIFICATION name: "Metadata Initiative 2023" description: "How we want to ensure the most important data assets in our organization have all of the most important and expected pieces of metadata filled out" prompts: diff --git a/docs/automations/docs-propagation.md b/docs/automations/docs-propagation.md index a637afcde4dca..e8eba08d3640e 100644 --- a/docs/automations/docs-propagation.md +++ b/docs/automations/docs-propagation.md @@ -47,7 +47,7 @@ Notice that the user must have the `Manage Ingestion` permission to view and ena

-3**Enable Documentation Propagation**: Locate the 'Documentation Propagation' section and toggle the feature to enable it for column-level and asset-level propagation. +3. **Enable Documentation Propagation**: Locate the 'Documentation Propagation' section and toggle the feature to enable it for column-level and asset-level propagation. Currently, Column Level propagation is supported, with asset level propagation coming soon.

diff --git a/docs/deploy/aws.md b/docs/deploy/aws.md index 67dd9a734e67f..49b0ea1d69ae1 100644 --- a/docs/deploy/aws.md +++ b/docs/deploy/aws.md @@ -138,7 +138,7 @@ datahub-frontend: enabled: true image: repository: acryldata/datahub-frontend-react - tag: "latest" + tag: "head" ingress: enabled: true annotations: @@ -154,6 +154,7 @@ datahub-frontend: paths: - /* ``` +Do not use the 'latest' or 'debug' tags for any of the images, as those are not supported and are present only due to legacy reasons. Please use 'head' or version-specific tags, like v0.8.40. For production, we recommend using version-specific tags, not 'head'. You need to request a certificate in the AWS Certificate Manager by following this [guide](https://docs.aws.amazon.com/acm/latest/userguide/gs-acm-request-public.html), and replace certificate-arn with diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 37e21d2395629..d002331e3929e 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -20,6 +20,10 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Breaking Changes +- #11486 - Deprecated Criterion filters using `value`. Use `values` instead. This also deprecates the ability to use comma delimited string to represent multiple values using `value`. +- #11484 - Metadata service authentication enabled by default +- #11484 - Rest API authorization enabled by default + ### Potential Downtime ### Deprecations diff --git a/docs/lineage/openlineage.md b/docs/lineage/openlineage.md index c91aa7499802c..71374a51d578b 100644 --- a/docs/lineage/openlineage.md +++ b/docs/lineage/openlineage.md @@ -56,9 +56,12 @@ Example: "producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client" } ``` + ##### How to set up Airflow -Follow the Airflow guide to setup the Airflow DAGs to send lineage information to DataHub. The guide can be found [here](https://airflow.apache.org/docs/apache-airflow-providers-openlineage/stable/guides/user.html + +Follow the Airflow guide to setup the Airflow DAGs to send lineage information to DataHub. The guide can be found [here](https://airflow.apache.org/docs/apache-airflow-providers-openlineage/stable/guides/user.html). The transport should look like this: + ```json {"type": "http", "url": "https://GMS_SERVER_HOST:GMS_PORT/openapi/openlineage/", @@ -71,12 +74,13 @@ The transport should look like this: ``` #### Known Limitations + With Spark and Airflow we recommend using the Spark Lineage or DataHub's Airflow plugin for tighter integration with DataHub. -- **[PathSpec](https://datahubproject.io/docs/metadata-integration/java/acryl-spark-lineage/#configuring-hdfs-based-dataset-urns) Support**: While the REST endpoint supports OpenLineage messages, full [PathSpec](https://datahubproject.io/docs/metadata-integration/java/acryl-spark-lineage/#configuring-hdfs-based-dataset-urns)) support is not yet available. +- **[PathSpec](https://datahubproject.io/docs/metadata-integration/java/acryl-spark-lineage/#configuring-hdfs-based-dataset-urns) Support**: While the REST endpoint supports OpenLineage messages, full [PathSpec](https://datahubproject.io/docs/metadata-integration/java/acryl-spark-lineage/#configuring-hdfs-based-dataset-urns)) support is not yet available in the OpenLineage endpoint but it is available in the Acryl Spark Plugin. + +etc... -- **Column-level Lineage**: DataHub's current OpenLineage support does not provide full column-level lineage tracking. -- etc... ### 2. Spark Event Listener Plugin DataHub's Spark Event Listener plugin enhances OpenLineage support by providing additional features such as PathSpec support, column-level lineage, and more. diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java index 8777be57e1bd8..e999471488dd7 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/Edge.java @@ -14,6 +14,7 @@ import java.util.Optional; import java.util.stream.Collectors; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import lombok.AllArgsConstructor; import lombok.Data; import lombok.EqualsAndHashCode; @@ -36,6 +37,10 @@ public class Edge { @EqualsAndHashCode.Include private Urn lifecycleOwner; // An entity through which the edge between source and destination is created @EqualsAndHashCode.Include private Urn via; + @EqualsAndHashCode.Exclude @Nullable private Boolean sourceStatus; + @EqualsAndHashCode.Exclude @Nullable private Boolean destinationStatus; + @EqualsAndHashCode.Exclude @Nullable private Boolean viaStatus; + @EqualsAndHashCode.Exclude @Nullable private Boolean lifecycleOwnerStatus; // For backwards compatibility public Edge( @@ -57,6 +62,38 @@ public Edge( updatedActor, properties, null, + null, + null, + null, + null, + null); + } + + public Edge( + Urn source, + Urn destination, + String relationshipType, + Long createdOn, + Urn createdActor, + Long updatedOn, + Urn updatedActor, + Map properties, + Urn lifecycleOwner, + Urn via) { + this( + source, + destination, + relationshipType, + createdOn, + createdActor, + updatedOn, + updatedActor, + properties, + lifecycleOwner, + via, + null, + null, + null, null); } @@ -91,6 +128,10 @@ public String toDocId(@Nonnull String idHashAlgo) { public static final String EDGE_FIELD_LIFECYCLE_OWNER = "lifecycleOwner"; public static final String EDGE_SOURCE_URN_FIELD = "source.urn"; public static final String EDGE_DESTINATION_URN_FIELD = "destination.urn"; + public static final String EDGE_SOURCE_STATUS = "source.removed"; + public static final String EDGE_DESTINATION_STATUS = "destination.removed"; + public static final String EDGE_FIELD_VIA_STATUS = "viaRemoved"; + public static final String EDGE_FIELD_LIFECYCLE_OWNER_STATUS = "lifecycleOwnerRemoved"; public static final List> KEY_SORTS = ImmutableList.of( diff --git a/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/EdgeUrnType.java b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/EdgeUrnType.java new file mode 100644 index 0000000000000..2fc2f4b588e8b --- /dev/null +++ b/entity-registry/src/main/java/com/linkedin/metadata/aspect/models/graph/EdgeUrnType.java @@ -0,0 +1,8 @@ +package com.linkedin.metadata.aspect.models.graph; + +public enum EdgeUrnType { + SOURCE, + DESTINATION, + VIA, + LIFECYCLE_OWNER +} diff --git a/gradle.properties b/gradle.properties index 60ea124684a04..e42e18dab677b 100644 --- a/gradle.properties +++ b/gradle.properties @@ -2,8 +2,13 @@ org.gradle.configureondemand=true org.gradle.parallel=true org.gradle.caching=true +# Cycle daemons after 30m +org.gradle.daemon.idletimeout=1800000 + # Increase gradle JVM memory to 5GB to allow tests to run locally -org.gradle.jvmargs=-Xmx5120m +org.gradle.jvmargs=-Xmx5120m -XX:MaxMetaspaceSize=512m +org.gradle.workers.max=4 + # Increase retries to 5 (from default of 3) and increase interval from 125ms to 1s. # Based on this thread https://github.com/gradle/gradle/issues/4629, it's unclear # if we should be using systemProp or not. We're using both for now. diff --git a/metadata-auth/auth-api/src/main/java/com/datahub/authentication/Authentication.java b/metadata-auth/auth-api/src/main/java/com/datahub/authentication/Authentication.java index b53d868e6e878..7583a4efd6425 100644 --- a/metadata-auth/auth-api/src/main/java/com/datahub/authentication/Authentication.java +++ b/metadata-auth/auth-api/src/main/java/com/datahub/authentication/Authentication.java @@ -4,8 +4,10 @@ import java.util.Map; import java.util.Objects; import javax.annotation.Nonnull; +import lombok.EqualsAndHashCode; /** Class representing an authenticated actor accessing DataHub. */ +@EqualsAndHashCode public class Authentication { private final Actor authenticatedActor; diff --git a/metadata-ingestion/docs/sources/dbt/dbt.md b/metadata-ingestion/docs/sources/dbt/dbt.md index 52a19777dd033..9f366f579e5fc 100644 --- a/metadata-ingestion/docs/sources/dbt/dbt.md +++ b/metadata-ingestion/docs/sources/dbt/dbt.md @@ -175,7 +175,7 @@ To integrate with dbt tests, the `dbt` source needs access to the `run_results.j 1. Run `dbt build` 2. Copy the `target/run_results.json` file to a separate location. This is important, because otherwise subsequent `dbt` commands will overwrite the run results. 3. Run `dbt docs generate` to generate the `manifest.json` and `catalog.json` files -4. The dbt source makes use of the manifest, catalog, and run results file, and hence will need to be moved to a location accessible to the `dbt` source (e.g. s3 or local file system). In the ingestion recipe, the `test_results_path` config must be set to the location of the `run_results.json` file from the `dbt build` or `dbt test` run. +4. The dbt source makes use of the manifest, catalog, and run results file, and hence will need to be moved to a location accessible to the `dbt` source (e.g. s3 or local file system). In the ingestion recipe, the `run_results_paths` config must be set to the location of the `run_results.json` file from the `dbt build` or `dbt test` run. The connector will produce the following things: @@ -219,7 +219,8 @@ source: config: manifest_path: _path_to_manifest_json catalog_path: _path_to_catalog_json - test_results_path: _path_to_run_results_json + run_results_paths: + - _path_to_run_results_json target_platform: postgres entities_enabled: test_results: Only @@ -233,7 +234,8 @@ source: config: manifest_path: _path_to_manifest_json catalog_path: _path_to_catalog_json - run_results_path: _path_to_run_results_json + run_results_paths: + - _path_to_run_results_json target_platform: postgres entities_enabled: test_results: No diff --git a/metadata-ingestion/docs/sources/dbt/dbt_recipe.yml b/metadata-ingestion/docs/sources/dbt/dbt_recipe.yml index 251aba44db387..e6949af4cf6ff 100644 --- a/metadata-ingestion/docs/sources/dbt/dbt_recipe.yml +++ b/metadata-ingestion/docs/sources/dbt/dbt_recipe.yml @@ -6,7 +6,8 @@ source: manifest_path: "${DBT_PROJECT_ROOT}/target/manifest_file.json" catalog_path: "${DBT_PROJECT_ROOT}/target/catalog_file.json" sources_path: "${DBT_PROJECT_ROOT}/target/sources_file.json" # optional for freshness - test_results_path: "${DBT_PROJECT_ROOT}/target/run_results.json" # optional for recording dbt test results after running dbt test + run_results_paths: + - "${DBT_PROJECT_ROOT}/target/run_results.json" # optional for recording dbt test results after running dbt test # Options target_platform: "my_target_platform_id" # e.g. bigquery/postgres/etc. diff --git a/metadata-ingestion/docs/sources/openapi/openapi_recipe.yml b/metadata-ingestion/docs/sources/openapi/openapi_recipe.yml index 07c08a814a7b1..68cf869c0106f 100644 --- a/metadata-ingestion/docs/sources/openapi/openapi_recipe.yml +++ b/metadata-ingestion/docs/sources/openapi/openapi_recipe.yml @@ -4,11 +4,21 @@ source: name: test_endpoint # this name will appear in DatHub url: https://test_endpoint.com/ swagger_file: classicapi/doc/swagger.json # where to search for the OpenApi definitions - get_token: # optional, if you need to get an authentication token beforehand + + # option 1: bearer token + bearer_token: "" + + # option 2: dynamically generated tokens, username/password is mandetory + get_token: request_type: get - url: api/authentication/login?username={username}&password={password} - username: your_username # optional - password: your_password # optional + url_complement: api/authentication/login?username={username}&password={password} + username: your_username + password: your_password + + # option 3: using basic auth + username: your_username + password: your_password + forced_examples: # optionals /accounts/groupname/{name}: ['test'] /accounts/username/{name}: ['test'] diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 8b778048c3475..bf80172441405 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -101,7 +101,7 @@ sqlglot_lib = { # Using an Acryl fork of sqlglot. # https://github.com/tobymao/sqlglot/compare/main...hsheth2:sqlglot:main?expand=1 - "acryl-sqlglot[rs]==25.20.2.dev5", + "acryl-sqlglot[rs]==25.20.2.dev6", } classification_lib = { diff --git a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py index 44fd32d5a426b..5b188edf9563b 100644 --- a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py +++ b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py @@ -94,60 +94,59 @@ def urn_must_be_present(cls, v, values): return v @staticmethod - def create(file: str) -> None: - emitter: DataHubGraph - - with get_default_graph() as emitter: - with open(file) as fp: - structuredproperties: List[dict] = yaml.safe_load(fp) - for structuredproperty_raw in structuredproperties: - structuredproperty = StructuredProperties.parse_obj( - structuredproperty_raw + def create(file: str, graph: Optional[DataHubGraph] = None) -> None: + emitter: DataHubGraph = graph if graph else get_default_graph() + + with open(file) as fp: + structuredproperties: List[dict] = yaml.safe_load(fp) + for structuredproperty_raw in structuredproperties: + structuredproperty = StructuredProperties.parse_obj( + structuredproperty_raw + ) + if not structuredproperty.type.islower(): + structuredproperty.type = structuredproperty.type.lower() + logger.warn( + f"Structured property type should be lowercase. Updated to {structuredproperty.type}" ) - if not structuredproperty.type.islower(): - structuredproperty.type = structuredproperty.type.lower() - logger.warn( - f"Structured property type should be lowercase. Updated to {structuredproperty.type}" - ) - if not AllowedTypes.check_allowed_type(structuredproperty.type): - raise ValueError( - f"Type {structuredproperty.type} is not allowed. Allowed types are {AllowedTypes.values()}" - ) - mcp = MetadataChangeProposalWrapper( - entityUrn=structuredproperty.urn, - aspect=StructuredPropertyDefinitionClass( - qualifiedName=structuredproperty.fqn, - valueType=Urn.make_data_type_urn(structuredproperty.type), - displayName=structuredproperty.display_name, - description=structuredproperty.description, - entityTypes=[ - Urn.make_entity_type_urn(entity_type) - for entity_type in structuredproperty.entity_types or [] - ], - cardinality=structuredproperty.cardinality, - immutable=structuredproperty.immutable, - allowedValues=( - [ - PropertyValueClass( - value=v.value, description=v.description - ) - for v in structuredproperty.allowed_values - ] - if structuredproperty.allowed_values - else None - ), - typeQualifier=( - { - "allowedTypes": structuredproperty.type_qualifier.allowed_types - } - if structuredproperty.type_qualifier - else None - ), - ), + if not AllowedTypes.check_allowed_type(structuredproperty.type): + raise ValueError( + f"Type {structuredproperty.type} is not allowed. Allowed types are {AllowedTypes.values()}" ) - emitter.emit_mcp(mcp) + mcp = MetadataChangeProposalWrapper( + entityUrn=structuredproperty.urn, + aspect=StructuredPropertyDefinitionClass( + qualifiedName=structuredproperty.fqn, + valueType=Urn.make_data_type_urn(structuredproperty.type), + displayName=structuredproperty.display_name, + description=structuredproperty.description, + entityTypes=[ + Urn.make_entity_type_urn(entity_type) + for entity_type in structuredproperty.entity_types or [] + ], + cardinality=structuredproperty.cardinality, + immutable=structuredproperty.immutable, + allowedValues=( + [ + PropertyValueClass( + value=v.value, description=v.description + ) + for v in structuredproperty.allowed_values + ] + if structuredproperty.allowed_values + else None + ), + typeQualifier=( + { + "allowedTypes": structuredproperty.type_qualifier.allowed_types + } + if structuredproperty.type_qualifier + else None + ), + ), + ) + emitter.emit_mcp(mcp) - logger.info(f"Created structured property {structuredproperty.urn}") + logger.info(f"Created structured property {structuredproperty.urn}") @classmethod def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties": diff --git a/metadata-ingestion/src/datahub/cli/timeline_cli.py b/metadata-ingestion/src/datahub/cli/timeline_cli.py index 63e05aa65d9a5..08672528abb5d 100644 --- a/metadata-ingestion/src/datahub/cli/timeline_cli.py +++ b/metadata-ingestion/src/datahub/cli/timeline_cli.py @@ -9,7 +9,7 @@ from termcolor import colored from datahub.emitter.mce_builder import dataset_urn_to_key, schema_field_urn_to_key -from datahub.ingestion.graph.client import get_default_graph +from datahub.ingestion.graph.client import DataHubGraph, get_default_graph from datahub.telemetry import telemetry from datahub.upgrade import upgrade from datahub.utilities.urns.urn import Urn @@ -62,8 +62,9 @@ def get_timeline( start_time: Optional[int], end_time: Optional[int], diff: bool, + graph: Optional[DataHubGraph] = None, ) -> Any: - client = get_default_graph() + client = graph if graph else get_default_graph() session = client._session host = client.config.server if urn.startswith("urn%3A"): diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py index 7160aa6fc339d..ad12447532335 100644 --- a/metadata-ingestion/src/datahub/configuration/source_common.py +++ b/metadata-ingestion/src/datahub/configuration/source_common.py @@ -4,14 +4,11 @@ from pydantic.fields import Field from datahub.configuration.common import ConfigModel +from datahub.emitter.enum_helpers import get_enum_options from datahub.metadata.schema_classes import FabricTypeClass DEFAULT_ENV = FabricTypeClass.PROD - -# Get all the constants from the FabricTypeClass. It's not an enum, so this is a bit hacky but works. -ALL_ENV_TYPES: Set[str] = { - value for name, value in vars(FabricTypeClass).items() if not name.startswith("_") -} +ALL_ENV_TYPES: Set[str] = set(get_enum_options(FabricTypeClass)) class PlatformInstanceConfigMixin(ConfigModel): diff --git a/metadata-ingestion/src/datahub/emitter/enum_helpers.py b/metadata-ingestion/src/datahub/emitter/enum_helpers.py new file mode 100644 index 0000000000000..89949ab3717ff --- /dev/null +++ b/metadata-ingestion/src/datahub/emitter/enum_helpers.py @@ -0,0 +1,11 @@ +from typing import List, Type + + +def get_enum_options(_class: Type[object]) -> List[str]: + """Get the valid values for an enum in the datahub.metadata.schema_classes module.""" + + return [ + value + for name, value in vars(_class).items() + if not callable(value) and not name.startswith("_") + ] diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index e273bab62fe7a..d3a930d988171 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -11,7 +11,6 @@ from typing import ( TYPE_CHECKING, Any, - Iterable, List, Optional, Tuple, @@ -25,7 +24,8 @@ import typing_inspect from avrogen.dict_wrapper import DictWrapper -from datahub.configuration.source_common import DEFAULT_ENV as DEFAULT_ENV_CONFIGURATION +from datahub.configuration.source_common import DEFAULT_ENV +from datahub.emitter.enum_helpers import get_enum_options from datahub.metadata.schema_classes import ( AssertionKeyClass, AuditStampClass, @@ -50,15 +50,12 @@ UpstreamLineageClass, _Aspect as AspectAbstract, ) +from datahub.metadata.urns import DataFlowUrn, DatasetUrn, TagUrn from datahub.utilities.urn_encoder import UrnEncoder -from datahub.utilities.urns.data_flow_urn import DataFlowUrn -from datahub.utilities.urns.dataset_urn import DatasetUrn -from datahub.utilities.urns.tag_urn import TagUrn logger = logging.getLogger(__name__) Aspect = TypeVar("Aspect", bound=AspectAbstract) -DEFAULT_ENV = DEFAULT_ENV_CONFIGURATION DEFAULT_FLOW_CLUSTER = "prod" UNKNOWN_USER = "urn:li:corpuser:unknown" DATASET_URN_TO_LOWER: bool = ( @@ -374,19 +371,11 @@ def make_ml_model_group_urn(platform: str, group_name: str, env: str) -> str: ) -def _get_enum_options(_class: Type[object]) -> Iterable[str]: - return [ - f - for f in dir(_class) - if not callable(getattr(_class, f)) and not f.startswith("_") - ] - - def validate_ownership_type(ownership_type: str) -> Tuple[str, Optional[str]]: if ownership_type.startswith("urn:li:"): return OwnershipTypeClass.CUSTOM, ownership_type ownership_type = ownership_type.upper() - if ownership_type in _get_enum_options(OwnershipTypeClass): + if ownership_type in get_enum_options(OwnershipTypeClass): return ownership_type, None raise ValueError(f"Unexpected ownership type: {ownership_type}") diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py index cc2a1dc7a7322..b7fb1fd56891c 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py @@ -3,6 +3,8 @@ from pydantic.fields import Field from pydantic.main import BaseModel +from datahub.cli.env_utils import get_boolean_env_variable +from datahub.emitter.enum_helpers import get_enum_options from datahub.emitter.mce_builder import ( Aspect, datahub_guid, @@ -34,6 +36,16 @@ TagAssociationClass, ) +# In https://github.com/datahub-project/datahub/pull/11214, we added a +# new env field to container properties. However, populating this field +# with servers older than 0.14.1 will cause errors. This environment +# variable is an escape hatch to avoid this compatibility issue. +# TODO: Once the model change has been deployed for a while, we can remove this. +# Probably can do it at the beginning of 2025. +_INCLUDE_ENV_IN_CONTAINER_PROPERTIES = get_boolean_env_variable( + "DATAHUB_INCLUDE_ENV_IN_CONTAINER_PROPERTIES", default=True +) + class DatahubKey(BaseModel): def guid_dict(self) -> Dict[str, str]: @@ -191,16 +203,18 @@ def gen_containers( created: Optional[int] = None, last_modified: Optional[int] = None, ) -> Iterable[MetadataWorkUnit]: - # because of backwards compatibility with a past issue, container_key.env may be a valid env or an instance name + # Extra validation on the env field. + # In certain cases (mainly for backwards compatibility), the env field will actually + # have a platform instance name. env = ( container_key.env - if container_key.env in vars(FabricTypeClass).values() + if container_key.env in get_enum_options(FabricTypeClass) else None ) + container_urn = container_key.as_urn() yield MetadataChangeProposalWrapper( entityUrn=f"{container_urn}", - # entityKeyAspect=ContainerKeyClass(guid=parent_container_key.guid()), aspect=ContainerProperties( name=name, description=description, @@ -214,7 +228,7 @@ def gen_containers( lastModified=( TimeStamp(time=last_modified) if last_modified is not None else None ), - env=env if env is not None else None, + env=env if _INCLUDE_ENV_IN_CONTAINER_PROPERTIES else None, ), ).as_workunit() diff --git a/smoke-test/requests_wrapper/constants.py b/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/__init__.py similarity index 100% rename from smoke-test/requests_wrapper/constants.py rename to metadata-ingestion/src/datahub/ingestion/api/auto_work_units/__init__.py diff --git a/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py b/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py new file mode 100644 index 0000000000000..fc164c8479365 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py @@ -0,0 +1,133 @@ +import dataclasses +import json +from typing import Dict, Iterable, Optional + +from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.metadata.schema_classes import ( + ChangeTypeClass, + DatasetPropertiesClass, + GenericAspectClass, + MetadataChangeProposalClass, + OperationClass, + TimeStampClass, +) +from datahub.specific.dataset import DatasetPatchBuilder +from datahub.utilities.urns.urn import guess_entity_type + + +@dataclasses.dataclass +class TimestampPair: + last_modified_dataset_props: Optional[ + TimeStampClass + ] # last_modified of datasetProperties aspect + last_updated_timestamp_dataset_props: Optional[ + int + ] # lastUpdatedTimestamp of the operation aspect + + +def try_aspect_from_metadata_change_proposal_class( + wu: MetadataWorkUnit, +) -> Optional[DatasetPropertiesClass]: + if ( + isinstance(wu.metadata, MetadataChangeProposalClass) + and wu.metadata.aspectName == "datasetProperties" + and wu.metadata.changeType == ChangeTypeClass.PATCH + and isinstance(wu.metadata.aspect, GenericAspectClass) + ): + patch_dataset_properties = json.loads(wu.metadata.aspect.value) + for operation in patch_dataset_properties: + if operation.get("path") == "/lastModified": + # Deserializing `lastModified` as the `auto_patch_last_modified` function relies on this property + # to decide if a patch aspect for the datasetProperties aspect should be generated + return DatasetPropertiesClass( + lastModified=TimeStampClass(time=operation["value"]["time"]) + ) + + return None + + +def auto_patch_last_modified( + stream: Iterable[MetadataWorkUnit], +) -> Iterable[MetadataWorkUnit]: + """ + Generate a patch request for datasetProperties aspect in-case + 1. `lastModified` of datasetProperties is not set + 2. And there are operation aspects + in this case set the `lastModified` of datasetProperties to max value of operation aspects `lastUpdatedTimestamp`. + + We need this functionality to support sort by `last modified` on UI. + """ + candidate_dataset_for_patch: Dict[str, TimestampPair] = {} + + for wu in stream: + if ( + guess_entity_type(wu.get_urn()) != "dataset" + ): # we are only processing datasets + yield wu + continue + + dataset_properties_aspect = wu.get_aspect_of_type( + DatasetPropertiesClass + ) or try_aspect_from_metadata_change_proposal_class(wu) + dataset_operation_aspect = wu.get_aspect_of_type(OperationClass) + + timestamp_pair = candidate_dataset_for_patch.get(wu.get_urn()) + + if timestamp_pair: + # Update the timestamp_pair + if dataset_properties_aspect and dataset_properties_aspect.lastModified: + timestamp_pair.last_modified_dataset_props = ( + dataset_properties_aspect.lastModified + ) + + if ( + dataset_operation_aspect + and dataset_operation_aspect.lastUpdatedTimestamp + ): + timestamp_pair.last_updated_timestamp_dataset_props = max( + timestamp_pair.last_updated_timestamp_dataset_props or 0, + dataset_operation_aspect.lastUpdatedTimestamp, + ) + + else: + # Create new TimestampPair + last_modified_dataset_props: Optional[TimeStampClass] = None + last_updated_timestamp_dataset_props: Optional[int] = None + + if dataset_properties_aspect: + last_modified_dataset_props = dataset_properties_aspect.lastModified + + if dataset_operation_aspect: + last_updated_timestamp_dataset_props = ( + dataset_operation_aspect.lastUpdatedTimestamp + ) + + candidate_dataset_for_patch[wu.get_urn()] = TimestampPair( + last_modified_dataset_props=last_modified_dataset_props, + last_updated_timestamp_dataset_props=last_updated_timestamp_dataset_props, + ) + + yield wu + + # Emit a patch datasetProperties aspect for dataset where last_modified is None + for entity_urn, timestamp_pair in candidate_dataset_for_patch.items(): + # Emit patch if last_modified is not set and last_updated_timestamp is set + if ( + timestamp_pair.last_modified_dataset_props is None + and timestamp_pair.last_updated_timestamp_dataset_props + ): + dataset_patch_builder = DatasetPatchBuilder(urn=entity_urn) + + dataset_patch_builder.set_last_modified( + timestamp=TimeStampClass( + time=timestamp_pair.last_updated_timestamp_dataset_props + ) + ) + + yield from [ + MetadataWorkUnit( + id=MetadataWorkUnit.generate_workunit_id(mcp), + mcp_raw=mcp, + ) + for mcp in dataset_patch_builder.build() + ] diff --git a/metadata-ingestion/src/datahub/ingestion/api/source.py b/metadata-ingestion/src/datahub/ingestion/api/source.py index 3dea3d36f41f1..85ae17ddf6529 100644 --- a/metadata-ingestion/src/datahub/ingestion/api/source.py +++ b/metadata-ingestion/src/datahub/ingestion/api/source.py @@ -28,6 +28,9 @@ from datahub.configuration.common import ConfigModel from datahub.configuration.source_common import PlatformInstanceConfigMixin from datahub.emitter.mcp_builder import mcps_from_mce +from datahub.ingestion.api.auto_work_units.auto_dataset_properties_aspect import ( + auto_patch_last_modified, +) from datahub.ingestion.api.closeable import Closeable from datahub.ingestion.api.common import PipelineContext, RecordEnvelope, WorkUnit from datahub.ingestion.api.report import Report @@ -443,6 +446,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]: ), browse_path_processor, partial(auto_workunit_reporter, self.get_report()), + auto_patch_last_modified, ] @staticmethod diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py index b333bcf695a46..7f64055f505f4 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py @@ -12,7 +12,7 @@ from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport from datahub.ingestion.source_report.time_window import BaseTimeWindowReport from datahub.sql_parsing.sql_parsing_aggregator import SqlAggregatorReport -from datahub.utilities.lossy_collections import LossyDict, LossyList +from datahub.utilities.lossy_collections import LossyDict, LossyList, LossySet from datahub.utilities.perf_timer import PerfTimer from datahub.utilities.stats_collections import TopKDict, int_top_k_dict @@ -69,6 +69,9 @@ class BigQueryQueriesExtractorReport(Report): num_total_queries: int = 0 num_unique_queries: int = 0 + num_discovered_tables: Optional[int] = None + inferred_temp_tables: LossySet[str] = field(default_factory=LossySet) + @dataclass class BigQueryV2Report( diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py index 7c1abe2ce3569..dd683559a007b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema.py @@ -105,6 +105,7 @@ class BigqueryTable(BaseTable): long_term_billable_bytes: Optional[int] = None partition_info: Optional[PartitionInfo] = None columns_ignore_from_profiling: List[str] = field(default_factory=list) + external: bool = False @dataclass @@ -252,7 +253,16 @@ def get_datasets_for_project_id( self.report.num_list_datasets_api_requests += 1 datasets = self.bq_client.list_datasets(project_id, max_results=maxResults) return [ - BigqueryDataset(name=d.dataset_id, labels=d.labels) for d in datasets + BigqueryDataset( + name=d.dataset_id, + labels=d.labels, + location=( + d._properties.get("location") + if hasattr(d, "_properties") and isinstance(d._properties, dict) + else None + ), + ) + for d in datasets ] # This is not used anywhere @@ -295,12 +305,12 @@ def get_tables_for_dataset( dataset_name: str, tables: Dict[str, TableListItem], report: BigQueryV2Report, - with_data_read_permission: bool = False, + with_partitions: bool = False, ) -> Iterator[BigqueryTable]: with PerfTimer() as current_timer: filter_clause: str = ", ".join(f"'{table}'" for table in tables.keys()) - if with_data_read_permission: + if with_partitions: query_template = BigqueryQuery.tables_for_dataset else: query_template = BigqueryQuery.tables_for_dataset_without_partition_data @@ -374,6 +384,7 @@ def _make_bigquery_table( num_partitions=table.get("num_partitions"), active_billable_bytes=table.get("active_billable_bytes"), long_term_billable_bytes=table.get("long_term_billable_bytes"), + external=(table.table_type == BigqueryTableType.EXTERNAL), ) def get_views_for_dataset( diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py index 489773c5745ff..6ea8f21e8b291 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py @@ -242,7 +242,11 @@ def gen_project_id_containers(self, database: str) -> Iterable[MetadataWorkUnit] ) def gen_dataset_containers( - self, dataset: str, project_id: str, tags: Optional[Dict[str, str]] = None + self, + dataset: str, + project_id: str, + tags: Optional[Dict[str, str]] = None, + extra_properties: Optional[Dict[str, str]] = None, ) -> Iterable[MetadataWorkUnit]: schema_container_key = self.gen_dataset_key(project_id, dataset) @@ -272,6 +276,7 @@ def gen_dataset_containers( else None ), tags=tags_joined, + extra_properties=extra_properties, ) def _process_project( @@ -400,7 +405,14 @@ def _process_schema( if self.config.include_schema_metadata: yield from self.gen_dataset_containers( - dataset_name, project_id, bigquery_dataset.labels + dataset_name, + project_id, + bigquery_dataset.labels, + ( + {"location": bigquery_dataset.location} + if bigquery_dataset.location + else None + ), ) columns = None @@ -445,7 +457,7 @@ def _process_schema( if self.config.include_tables: db_tables[dataset_name] = list( - self.get_tables_for_dataset(project_id, dataset_name) + self.get_tables_for_dataset(project_id, bigquery_dataset) ) for table in db_tables[dataset_name]: @@ -686,7 +698,9 @@ def gen_table_dataset_workunits( if table.max_shard_id: custom_properties["max_shard_id"] = str(table.max_shard_id) custom_properties["is_sharded"] = str(True) - sub_types = ["sharded table"] + sub_types + sub_types = [DatasetSubTypes.SHARDED_TABLE] + sub_types + if table.external: + sub_types = [DatasetSubTypes.EXTERNAL_TABLE] + sub_types tags_to_add = None if table.labels and self.config.capture_table_label_as_tag: @@ -971,25 +985,36 @@ def gen_schema_metadata( def get_tables_for_dataset( self, project_id: str, - dataset_name: str, + dataset: BigqueryDataset, ) -> Iterable[BigqueryTable]: # In bigquery there is no way to query all tables in a Project id with PerfTimer() as timer: + + # PARTITIONS INFORMATION_SCHEMA view is not available for BigLake tables + # based on Amazon S3 and Blob Storage data. + # https://cloud.google.com/bigquery/docs/omni-introduction#limitations + # Omni Locations - https://cloud.google.com/bigquery/docs/omni-introduction#locations + with_partitions = self.config.have_table_data_read_permission and not ( + dataset.location + and dataset.location.lower().startswith(("aws-", "azure-")) + ) + # Partitions view throw exception if we try to query partition info for too many tables # so we have to limit the number of tables we query partition info. # The conn.list_tables returns table infos that information_schema doesn't contain and this # way we can merge that info with the queried one. # https://cloud.google.com/bigquery/docs/information-schema-partitions - max_batch_size: int = ( - self.config.number_of_datasets_process_in_batch - if not self.config.have_table_data_read_permission - else self.config.number_of_datasets_process_in_batch_if_profiling_enabled - ) + if with_partitions: + max_batch_size = ( + self.config.number_of_datasets_process_in_batch_if_profiling_enabled + ) + else: + max_batch_size = self.config.number_of_datasets_process_in_batch # We get the list of tables in the dataset to get core table properties and to be able to process the tables in batches # We collect only the latest shards from sharded tables (tables with _YYYYMMDD suffix) and ignore temporary tables table_items = self.get_core_table_details( - dataset_name, project_id, self.config.temp_table_dataset_prefix + dataset.name, project_id, self.config.temp_table_dataset_prefix ) items_to_get: Dict[str, TableListItem] = {} @@ -998,9 +1023,9 @@ def get_tables_for_dataset( if len(items_to_get) % max_batch_size == 0: yield from self.schema_api.get_tables_for_dataset( project_id, - dataset_name, + dataset.name, items_to_get, - with_data_read_permission=self.config.have_table_data_read_permission, + with_partitions=with_partitions, report=self.report, ) items_to_get.clear() @@ -1008,13 +1033,13 @@ def get_tables_for_dataset( if items_to_get: yield from self.schema_api.get_tables_for_dataset( project_id, - dataset_name, + dataset.name, items_to_get, - with_data_read_permission=self.config.have_table_data_read_permission, + with_partitions=with_partitions, report=self.report, ) - self.report.metadata_extraction_sec[f"{project_id}.{dataset_name}"] = round( + self.report.metadata_extraction_sec[f"{project_id}.{dataset.name}"] = round( timer.elapsed_seconds(), 2 ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py index 27beb7b0254c4..fe64eeeb84139 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py @@ -113,7 +113,7 @@ def metadata_read_capability_test( project_id=project_id, dataset_name=result[0].name, tables={}, - with_data_read_permission=config.have_table_data_read_permission, + with_partitions=config.have_table_data_read_permission, report=BigQueryV2Report(), ) if len(list(tables)) == 0: diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py index 582c312f99098..6af8166fbf70c 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py @@ -173,6 +173,13 @@ def get_workunits( f"^{normalized_table_name}.{column}$" ) + if table.external and not self.config.profiling.profile_external_tables: + self.report.profiling_skipped_other[f"{project_id}.{dataset}"] += 1 + logger.info( + f"Skipping profiling of external table {project_id}.{dataset}.{table.name}" + ) + continue + # Emit the profile work unit logger.debug( f"Creating profile request for table {normalized_table_name}" diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py index a826f09b9a7c8..d57ec655b1f88 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/queries_extractor.py @@ -173,6 +173,9 @@ def __init__( format_queries=False, ) self.report.sql_aggregator = self.aggregator.report + self.report.num_discovered_tables = ( + len(self.discovered_tables) if self.discovered_tables else None + ) @functools.cached_property def local_temp_path(self) -> pathlib.Path: @@ -201,6 +204,7 @@ def is_temp_table(self, name: str) -> bool: and self.discovered_tables and str(BigQueryTableRef(table)) not in self.discovered_tables ): + self.report.inferred_temp_tables.add(name) return True except Exception: @@ -264,6 +268,8 @@ def get_workunits_internal( for query in query_instances.values(): if i > 0 and i % 10000 == 0: logger.info(f"Added {i} query log entries to SQL aggregator") + if self.report.sql_aggregator: + logger.info(self.report.sql_aggregator.as_string()) self.aggregator.add(query) i += 1 diff --git a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py index 4bc120fbecf8f..86c1c8db11b05 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py @@ -17,6 +17,8 @@ class DatasetSubTypes(StrEnum): POWERBI_DATASET_TABLE = "PowerBI Dataset Table" QLIK_DATASET = "Qlik Dataset" BIGQUERY_TABLE_SNAPSHOT = "Bigquery Table Snapshot" + SHARDED_TABLE = "Sharded Table" + EXTERNAL_TABLE = "External Table" SIGMA_DATASET = "Sigma Dataset" SAC_MODEL = "Model" SAC_IMPORT_DATA_MODEL = "Import Data Model" diff --git a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py index 12812aad441f2..c15f1deb43a3a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py @@ -5,7 +5,7 @@ from dataclasses import dataclass, field from datetime import datetime from enum import auto -from typing import Any, Dict, Iterable, List, Optional, Tuple +from typing import Any, Dict, Iterable, List, Optional, Set, Tuple import more_itertools import pydantic @@ -46,6 +46,7 @@ from datahub.ingestion.api.source import MetadataWorkUnitProcessor from datahub.ingestion.api.source_helpers import auto_workunit from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.source.dbt.dbt_tests import ( DBTTest, DBTTestResult, @@ -233,6 +234,10 @@ def can_emit_node_type(self, node_type: str) -> bool: return allowed == EmitDirective.YES + @property + def can_emit_test_definitions(self) -> bool: + return self.test_definitions == EmitDirective.YES + @property def can_emit_test_results(self) -> bool: return self.test_results == EmitDirective.YES @@ -415,15 +420,7 @@ def meta_mapping_validator( if v["operation"] == "add_owner": owner_category = v["config"].get("owner_category") if owner_category: - allowed_categories = [ - value - for name, value in vars(OwnershipTypeClass).items() - if not name.startswith("_") - ] - if (owner_category.upper()) not in allowed_categories: - raise ValueError( - f"Owner category {owner_category} is not one of {allowed_categories}" - ) + mce_builder.validate_ownership_type(owner_category) return meta_mapping @validator("include_column_lineage") @@ -743,8 +740,8 @@ def get_upstreams_for_test( all_nodes_map: Dict[str, DBTNode], platform_instance: Optional[str], environment: str, -) -> List[str]: - upstream_urns = [] +) -> Dict[str, str]: + upstreams = {} for upstream in test_node.upstream_nodes: if upstream not in all_nodes_map: @@ -755,15 +752,13 @@ def get_upstreams_for_test( upstream_manifest_node = all_nodes_map[upstream] - upstream_urns.append( - upstream_manifest_node.get_urn( - target_platform=DBT_PLATFORM, - data_platform_instance=platform_instance, - env=environment, - ) + upstreams[upstream] = upstream_manifest_node.get_urn( + target_platform=DBT_PLATFORM, + data_platform_instance=platform_instance, + env=environment, ) - return upstream_urns + return upstreams def make_mapping_upstream_lineage( @@ -900,40 +895,11 @@ def __init__(self, config: DBTCommonConfig, ctx: PipelineContext, platform: str) def create_test_entity_mcps( self, test_nodes: List[DBTNode], - custom_props: Dict[str, str], + extra_custom_props: Dict[str, str], all_nodes_map: Dict[str, DBTNode], ) -> Iterable[MetadataWorkUnit]: for node in sorted(test_nodes, key=lambda n: n.dbt_name): - assertion_urn = mce_builder.make_assertion_urn( - mce_builder.datahub_guid( - { - k: v - for k, v in { - "platform": DBT_PLATFORM, - "name": node.dbt_name, - "instance": self.config.platform_instance, - **( - # Ideally we'd include the env unconditionally. However, we started out - # not including env in the guid, so we need to maintain backwards compatibility - # with existing PROD assertions. - {"env": self.config.env} - if self.config.env != mce_builder.DEFAULT_ENV - and self.config.include_env_in_assertion_guid - else {} - ), - }.items() - if v is not None - } - ) - ) - - if self.config.entities_enabled.can_emit_node_type("test"): - yield MetadataChangeProposalWrapper( - entityUrn=assertion_urn, - aspect=self._make_data_platform_instance_aspect(), - ).as_workunit() - - upstream_urns = get_upstreams_for_test( + upstreams = get_upstreams_for_test( test_node=node, all_nodes_map=all_nodes_map, platform_instance=self.config.platform_instance, @@ -941,12 +907,51 @@ def create_test_entity_mcps( ) # In case a dbt test depends on multiple tables, we create separate assertions for each. - # TODO: This logic doesn't actually work properly, since we're reusing the same assertion_urn - # across multiple upstream tables, so we're actually only creating one assertion and the last - # upstream_urn gets used. Luckily, most dbt tests are associated with a single table, so this - # doesn't cause major issues in practice. - for upstream_urn in sorted(upstream_urns): - if self.config.entities_enabled.can_emit_node_type("test"): + for upstream_node_name, upstream_urn in upstreams.items(): + guid_upstream_part = {} + if len(upstreams) > 1: + # If we depend on multiple upstreams, we need to generate a unique guid for each assertion. + # If there was only one upstream, we want to maintain the original assertion for backwards compatibility. + guid_upstream_part = { + "on_dbt_upstream": upstream_node_name, + } + + assertion_urn = mce_builder.make_assertion_urn( + mce_builder.datahub_guid( + { + k: v + for k, v in { + "platform": DBT_PLATFORM, + "name": node.dbt_name, + "instance": self.config.platform_instance, + **( + # Ideally we'd include the env unconditionally. However, we started out + # not including env in the guid, so we need to maintain backwards compatibility + # with existing PROD assertions. + {"env": self.config.env} + if self.config.env != mce_builder.DEFAULT_ENV + and self.config.include_env_in_assertion_guid + else {} + ), + **guid_upstream_part, + }.items() + if v is not None + } + ) + ) + + custom_props = { + "dbt_unique_id": node.dbt_name, + "dbt_test_upstream_unique_id": upstream_node_name, + **extra_custom_props, + } + + if self.config.entities_enabled.can_emit_test_definitions: + yield MetadataChangeProposalWrapper( + entityUrn=assertion_urn, + aspect=self._make_data_platform_instance_aspect(), + ).as_workunit() + yield make_assertion_from_test( custom_props, node, @@ -1032,12 +1037,15 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: all_nodes_map, ) + def _is_allowed_node(self, key: str) -> bool: + return self.config.node_name_pattern.allowed(key) + def _filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]: nodes = [] for node in all_nodes: key = node.dbt_name - if not self.config.node_name_pattern.allowed(key): + if not self._is_allowed_node(key): self.report.nodes_filtered.append(key) continue @@ -1049,6 +1057,36 @@ def _filter_nodes(self, all_nodes: List[DBTNode]) -> List[DBTNode]: def _to_schema_info(schema_fields: List[SchemaField]) -> SchemaInfo: return {column.fieldPath: column.nativeDataType for column in schema_fields} + def _determine_cll_required_nodes( + self, all_nodes_map: Dict[str, DBTNode] + ) -> Tuple[Set[str], Set[str]]: + # Based on the filter patterns, we only need to do schema inference and CLL + # for a subset of nodes. + # If a node depends on an ephemeral model, the ephemeral model should also be in the CLL list. + # Invariant: If it's in the CLL list, it will also be in the schema list. + # Invariant: The upstream of any node in the CLL list will be in the schema list. + schema_nodes: Set[str] = set() + cll_nodes: Set[str] = set() + + def add_node_to_cll_list(dbt_name: str) -> None: + if dbt_name in cll_nodes: + return + for upstream in all_nodes_map[dbt_name].upstream_nodes: + schema_nodes.add(upstream) + + upstream_node = all_nodes_map[upstream] + if upstream_node.is_ephemeral_model(): + add_node_to_cll_list(upstream) + + cll_nodes.add(dbt_name) + schema_nodes.add(dbt_name) + + for dbt_name in all_nodes_map.keys(): + if self._is_allowed_node(dbt_name): + add_node_to_cll_list(dbt_name) + + return schema_nodes, cll_nodes + def _infer_schemas_and_update_cll( # noqa: C901 self, all_nodes_map: Dict[str, DBTNode] ) -> None: @@ -1075,7 +1113,7 @@ def _infer_schemas_and_update_cll( # noqa: C901 ) return - graph = self.ctx.graph + graph: Optional[DataHubGraph] = self.ctx.graph schema_resolver = SchemaResolver( platform=self.config.target_platform, @@ -1087,7 +1125,7 @@ def _infer_schemas_and_update_cll( # noqa: C901 # Iterate over the dbt nodes in topological order. # This ensures that we process upstream nodes before downstream nodes. - node_order = topological_sort( + all_node_order = topological_sort( list(all_nodes_map.keys()), edges=list( (upstream, node.dbt_name) @@ -1096,7 +1134,17 @@ def _infer_schemas_and_update_cll( # noqa: C901 if upstream in all_nodes_map ), ) - for dbt_name in node_order: + schema_required_nodes, cll_required_nodes = self._determine_cll_required_nodes( + all_nodes_map + ) + + for dbt_name in all_node_order: + if dbt_name not in schema_required_nodes: + logger.debug( + f"Skipping {dbt_name} because it is filtered out by patterns" + ) + continue + node = all_nodes_map[dbt_name] logger.debug(f"Processing CLL/schemas for {node.dbt_name}") @@ -1171,6 +1219,10 @@ def _infer_schemas_and_update_cll( # noqa: C901 # For sources, we generate CLL as a 1:1 mapping. # We don't support CLL for tests (assertions) or seeds. pass + elif node.dbt_name not in cll_required_nodes: + logger.debug( + f"Not generating CLL for {node.dbt_name} because we don't need it." + ) elif node.compiled_code: # Add CTE stops based on the upstreams list. cte_mapping = { diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py index 71d497c56f13e..e593e132dafd7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py @@ -13,7 +13,6 @@ Set, Tuple, Union, - cast, ) from looker_sdk.error import SDKError @@ -164,28 +163,6 @@ def __init__(self, config: LookerDashboardSourceConfig, ctx: PipelineContext): # The list values are used purely for debugging purposes. self.reachable_explores: Dict[Tuple[str, str], List[str]] = {} - # Keep stat generators to generate entity stat aspect later - stat_generator_config: looker_usage.StatGeneratorConfig = ( - looker_usage.StatGeneratorConfig( - looker_api_wrapper=self.looker_api, - looker_user_registry=self.user_registry, - interval=self.source_config.extract_usage_history_for_interval, - strip_user_ids_from_email=self.source_config.strip_user_ids_from_email, - platform_name=self.source_config.platform_name, - max_threads=self.source_config.max_threads, - ) - ) - - self.dashboard_stat_generator = looker_usage.create_stat_entity_generator( - looker_usage.SupportedStatEntity.DASHBOARD, - config=stat_generator_config, - ) - - self.chart_stat_generator = looker_usage.create_stat_entity_generator( - looker_usage.SupportedStatEntity.CHART, - config=stat_generator_config, - ) - # To keep track of folders (containers) which have already been ingested # Required, as we do not ingest all folders but only those that have dashboards/looks self.processed_folders: List[str] = [] @@ -380,7 +357,7 @@ def _get_input_fields_from_query( ) ) - # A query uses fields for filtering and those fields are defined in views, find the views those fields use + # A query uses fields for filtering, and those fields are defined in views, find the views those fields use filters: MutableMapping[str, Any] = ( query.filters if query.filters is not None else {} ) @@ -388,7 +365,8 @@ def _get_input_fields_from_query( if field is None: continue - # we haven't loaded in metadata about the explore yet, so we need to wait until explores are populated later to fetch this + # we haven't loaded in metadata about the explore yet, so we need to wait until explores are populated + # later to fetch this result.append( InputFieldElement( name=field, view_field=None, model=query.model, explore=query.view @@ -647,9 +625,7 @@ def _create_platform_instance_aspect( ) def _make_chart_urn(self, element_id: str) -> str: - platform_instance: Optional[str] = None - if self.source_config.include_platform_instance_in_urns: platform_instance = self.source_config.platform_instance @@ -871,18 +847,21 @@ def _make_dashboard_metadata_events( return proposals - def make_dashboard_urn(self, looker_dashboard: LookerDashboard) -> str: + def _make_dashboard_urn(self, looker_dashboard_name_part: str) -> str: + # Note that `looker_dashboard_name_part` will like be `dashboard.1234`. platform_instance: Optional[str] = None - if self.source_config.include_platform_instance_in_urns: platform_instance = self.source_config.platform_instance return builder.make_dashboard_urn( - name=looker_dashboard.get_urn_dashboard_id(), + name=looker_dashboard_name_part, platform=self.source_config.platform_name, platform_instance=platform_instance, ) + def make_dashboard_urn(self, looker_dashboard: LookerDashboard) -> str: + return self._make_dashboard_urn(looker_dashboard.get_urn_dashboard_id()) + def _make_explore_metadata_events( self, ) -> Iterable[ @@ -1396,7 +1375,6 @@ def _get_folder_and_ancestors_workunits( def extract_usage_stat( self, looker_dashboards: List[looker_usage.LookerDashboardForUsage] ) -> List[MetadataChangeProposalWrapper]: - mcps: List[MetadataChangeProposalWrapper] = [] looks: List[looker_usage.LookerChartForUsage] = [] # filter out look from all dashboard for dashboard in looker_dashboards: @@ -1407,16 +1385,33 @@ def extract_usage_stat( # dedup looks looks = list({str(look.id): look for look in looks}.values()) - usage_stat_generators = [ - self.dashboard_stat_generator( - cast(List[looker_usage.ModelForUsage], looker_dashboards), self.reporter - ), - self.chart_stat_generator( - cast(List[looker_usage.ModelForUsage], looks), self.reporter - ), - ] + # Keep stat generators to generate entity stat aspect later + stat_generator_config: looker_usage.StatGeneratorConfig = ( + looker_usage.StatGeneratorConfig( + looker_api_wrapper=self.looker_api, + looker_user_registry=self.user_registry, + interval=self.source_config.extract_usage_history_for_interval, + strip_user_ids_from_email=self.source_config.strip_user_ids_from_email, + max_threads=self.source_config.max_threads, + ) + ) + + dashboard_usage_generator = looker_usage.create_dashboard_stat_generator( + stat_generator_config, + self.reporter, + self._make_dashboard_urn, + looker_dashboards, + ) + + chart_usage_generator = looker_usage.create_chart_stat_generator( + stat_generator_config, + self.reporter, + self._make_chart_urn, + looks, + ) - for usage_stat_generator in usage_stat_generators: + mcps: List[MetadataChangeProposalWrapper] = [] + for usage_stat_generator in [dashboard_usage_generator, chart_usage_generator]: for mcp in usage_stat_generator.generate_usage_stat_mcps(): mcps.append(mcp) @@ -1486,13 +1481,27 @@ def extract_independent_looks(self) -> Iterable[MetadataWorkUnit]: ) for look in all_looks: if look.id in self.reachable_look_registry: - # This look is reachable from Dashboard + # This look is reachable from the Dashboard continue if look.query_id is None: logger.info(f"query_id is None for look {look.title}({look.id})") continue + if self.source_config.skip_personal_folders: + if look.folder is not None and ( + look.folder.is_personal or look.folder.is_personal_descendant + ): + self.reporter.info( + title="Dropped Look", + message="Dropped due to being a personal folder", + context=f"Look ID: {look.id}", + ) + + assert look.id, "Looker id is null" + self.reporter.report_charts_dropped(look.id) + continue + if look.id is not None: query: Optional[Query] = self.looker_api.get_look( look.id, fields=["query"] @@ -1510,11 +1519,12 @@ def extract_independent_looks(self) -> Iterable[MetadataWorkUnit]: LookerDashboardElement ] = self._get_looker_dashboard_element( DashboardElement( - id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes), we add the "looks_" prefix to look.id. + id=f"looks_{look.id}", # to avoid conflict with non-standalone looks (element.id prefixes), + # we add the "looks_" prefix to look.id. title=look.title, subtitle_text=look.description, look_id=look.id, - dashboard_id=None, # As this is independent look + dashboard_id=None, # As this is an independent look look=LookWithQuery( query=query, folder=look.folder, user_id=look.user_id ), diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py index 93af0effa9f1f..6a623e1e97b5d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py @@ -10,12 +10,10 @@ import logging from abc import ABC, abstractmethod from dataclasses import dataclass -from enum import Enum -from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, cast +from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, cast from looker_sdk.sdk.api40.models import Dashboard, LookWithQuery -import datahub.emitter.mce_builder as builder from datahub.emitter.mce_builder import Aspect, AspectAbstract from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.source.looker import looker_common @@ -97,7 +95,6 @@ class StatGeneratorConfig: looker_user_registry: LookerUserRegistry strip_user_ids_from_email: bool interval: str - platform_name: str max_threads: int = 1 @@ -166,7 +163,7 @@ class BaseStatGenerator(ABC): def __init__( self, config: StatGeneratorConfig, - looker_models: List[ModelForUsage], + looker_models: Sequence[ModelForUsage], report: LookerDashboardSourceReport, ): self.config = config @@ -411,14 +408,16 @@ class DashboardStatGenerator(BaseStatGenerator): def __init__( self, config: StatGeneratorConfig, - looker_dashboards: List[LookerDashboardForUsage], + looker_dashboards: Sequence[LookerDashboardForUsage], report: LookerDashboardSourceReport, + urn_builder: Callable[[str], str], ): super().__init__( config, - looker_models=cast(List[ModelForUsage], looker_dashboards), + looker_models=looker_dashboards, report=report, ) + self.urn_builder = urn_builder self.report = report self.report.report_dashboards_scanned_for_usage(len(looker_dashboards)) @@ -457,10 +456,7 @@ def _get_urn(self, model: ModelForUsage) -> str: assert isinstance(model, LookerDashboardForUsage) assert model.id is not None - return builder.make_dashboard_urn( - self.config.platform_name, - looker_common.get_urn_looker_dashboard_id(model.id), - ) + return self.urn_builder(looker_common.get_urn_looker_dashboard_id(model.id)) def to_entity_absolute_stat_aspect( self, looker_object: ModelForUsage @@ -528,14 +524,16 @@ class LookStatGenerator(BaseStatGenerator): def __init__( self, config: StatGeneratorConfig, - looker_looks: List[LookerChartForUsage], + looker_looks: Sequence[LookerChartForUsage], report: LookerDashboardSourceReport, + urn_builder: Callable[[str], str], ): super().__init__( config, - looker_models=cast(List[ModelForUsage], looker_looks), + looker_models=looker_looks, report=report, ) + self.urn_builder = urn_builder self.report = report report.report_charts_scanned_for_usage(len(looker_looks)) @@ -570,10 +568,7 @@ def _get_urn(self, model: ModelForUsage) -> str: assert isinstance(model, LookerChartForUsage) assert model.id is not None - return builder.make_chart_urn( - self.config.platform_name, - looker_common.get_urn_looker_element_id(str(model.id)), - ) + return self.urn_builder(looker_common.get_urn_looker_element_id(str(model.id))) def to_entity_absolute_stat_aspect( self, looker_object: ModelForUsage @@ -629,45 +624,34 @@ def append_user_stat( ) -class SupportedStatEntity(Enum): - DASHBOARD = "dashboard" - CHART = "chart" - - -# type_ is because of type is builtin identifier -def create_stat_entity_generator( - type_: SupportedStatEntity, config: StatGeneratorConfig -) -> Callable[[List[ModelForUsage], LookerDashboardSourceReport], BaseStatGenerator]: - # Wrapper function to defer creation of actual entities - # config is generally available at the startup, however entities may get created later during processing - def create_dashboard_stat_generator( - looker_dashboards: List[LookerDashboardForUsage], - report: LookerDashboardSourceReport, - ) -> BaseStatGenerator: - logger.debug( - "Number of dashboard received for stat processing = {}".format( - len(looker_dashboards) - ) - ) - return DashboardStatGenerator( - config=config, looker_dashboards=looker_dashboards, report=report +def create_dashboard_stat_generator( + config: StatGeneratorConfig, + report: LookerDashboardSourceReport, + urn_builder: Callable[[str], str], + looker_dashboards: Sequence[LookerDashboardForUsage], +) -> DashboardStatGenerator: + logger.debug( + "Number of dashboard received for stat processing = {}".format( + len(looker_dashboards) ) - - def create_chart_stat_generator( - looker_looks: List[LookerChartForUsage], report: LookerDashboardSourceReport - ) -> BaseStatGenerator: - logger.debug( - "Number of looks received for stat processing = {}".format( - len(looker_looks) - ) - ) - return LookStatGenerator( - config=config, looker_looks=looker_looks, report=report - ) - - stat_entities_generator = { - SupportedStatEntity.DASHBOARD: create_dashboard_stat_generator, - SupportedStatEntity.CHART: create_chart_stat_generator, - } - - return stat_entities_generator[type_] # type: ignore + ) + return DashboardStatGenerator( + config=config, + looker_dashboards=looker_dashboards, + report=report, + urn_builder=urn_builder, + ) + + +def create_chart_stat_generator( + config: StatGeneratorConfig, + report: LookerDashboardSourceReport, + urn_builder: Callable[[str], str], + looker_looks: Sequence[LookerChartForUsage], +) -> LookStatGenerator: + logger.debug( + "Number of looks received for stat processing = {}".format(len(looker_looks)) + ) + return LookStatGenerator( + config=config, looker_looks=looker_looks, report=report, urn_builder=urn_builder + ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/mode.py b/metadata-ingestion/src/datahub/ingestion/source/mode.py index 73427d9084dd3..56b8ce00a4d1f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/mode.py +++ b/metadata-ingestion/src/datahub/ingestion/source/mode.py @@ -15,6 +15,8 @@ import yaml from liquid import Template, Undefined from pydantic import Field, validator +from requests.adapters import HTTPAdapter, Retry +from requests.exceptions import ConnectionError from requests.models import HTTPBasicAuth, HTTPError from sqllineage.runner import LineageRunner from tenacity import retry_if_exception_type, stop_after_attempt, wait_exponential @@ -127,6 +129,10 @@ class ModeAPIConfig(ConfigModel): max_attempts: int = Field( default=5, description="Maximum number of attempts to retry before failing" ) + timeout: int = Field( + default=40, + description="Timout setting, how long to wait for the Mode rest api to send data before giving up", + ) class ModeConfig(StatefulIngestionConfigBase, DatasetLineageProviderConfigBase): @@ -299,7 +305,15 @@ def __init__(self, ctx: PipelineContext, config: ModeConfig): self.report = ModeSourceReport() self.ctx = ctx - self.session = requests.session() + self.session = requests.Session() + # Handling retry and backoff + retries = 3 + backoff_factor = 10 + retry = Retry(total=retries, backoff_factor=backoff_factor) + adapter = HTTPAdapter(max_retries=retry) + self.session.mount("http://", adapter) + self.session.mount("https://", adapter) + self.session.auth = HTTPBasicAuth( self.config.token, self.config.password.get_secret_value(), @@ -1469,15 +1483,16 @@ def _get_request_json(self, url: str) -> Dict: multiplier=self.config.api_options.retry_backoff_multiplier, max=self.config.api_options.max_retry_interval, ), - retry=retry_if_exception_type(HTTPError429), + retry=retry_if_exception_type((HTTPError429, ConnectionError)), stop=stop_after_attempt(self.config.api_options.max_attempts), ) @r.wraps def get_request(): try: - response = self.session.get(url) - response.raise_for_status() + response = self.session.get( + url, timeout=self.config.api_options.timeout + ) return response.json() except HTTPError as http_error: error_response = http_error.response diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 4a1ec14ca1d4e..9f011790990ec 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -313,12 +313,22 @@ class TableauConfig( # Tableau project pattern project_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), - description="Filter for specific Tableau projects. For example, use 'My Project' to ingest a root-level Project with name 'My Project', or 'My Project/Nested Project' to ingest a nested Project with name 'Nested Project'. " + description="[deprecated] Use project_path_pattern instead. Filter for specific Tableau projects. For example, use 'My Project' to ingest a root-level Project with name 'My Project', or 'My Project/Nested Project' to ingest a nested Project with name 'Nested Project'. " "By default, all Projects nested inside a matching Project will be included in ingestion. " "You can both allow and deny projects based on their name using their name, or a Regex pattern. " "Deny patterns always take precedence over allow patterns. " "By default, all projects will be ingested.", ) + _deprecate_projects_pattern = pydantic_field_deprecated("project_pattern") + + project_path_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Filters Tableau projects by their full path. For instance, 'My Project/Nested Project' targets a specific nested project named 'Nested Project'." + " This is also useful when you need to exclude all nested projects under a particular project." + " You can allow or deny projects by specifying their path or a regular expression pattern." + " Deny patterns always override allow patterns." + " By default, all projects are ingested.", + ) project_path_separator: str = Field( default="/", @@ -454,17 +464,23 @@ class TableauConfig( def projects_backward_compatibility(cls, values: Dict) -> Dict: projects = values.get("projects") project_pattern = values.get("project_pattern") - if project_pattern is None and projects: + project_path_pattern = values.get("project_path_pattern") + if project_pattern is None and project_path_pattern is None and projects: logger.warning( - "project_pattern is not set but projects is set. projects is deprecated, please use " - "project_pattern instead." + "projects is deprecated, please use " "project_path_pattern instead." ) logger.info("Initializing project_pattern from projects") values["project_pattern"] = AllowDenyPattern( allow=[f"^{prj}$" for prj in projects] ) - elif project_pattern != AllowDenyPattern.allow_all() and projects: - raise ValueError("projects is deprecated. Please use project_pattern only.") + elif (project_pattern or project_path_pattern) and projects: + raise ValueError( + "projects is deprecated. Please use project_path_pattern only." + ) + elif project_path_pattern and project_pattern: + raise ValueError( + "project_pattern is deprecated. Please use project_path_pattern only." + ) return values @@ -850,12 +866,13 @@ def form_path(project_id: str) -> List[str]: def _is_allowed_project(self, project: TableauProject) -> bool: # Either project name or project path should exist in allow - is_allowed: bool = self.config.project_pattern.allowed( - project.name - ) or self.config.project_pattern.allowed(self._get_project_path(project)) + is_allowed: bool = ( + self.config.project_pattern.allowed(project.name) + or self.config.project_pattern.allowed(self._get_project_path(project)) + ) and self.config.project_path_pattern.allowed(self._get_project_path(project)) if is_allowed is False: logger.info( - f"project({project.name}) is not allowed as per project_pattern" + f"Project ({project.name}) is not allowed as per project_pattern or project_path_pattern" ) return is_allowed @@ -887,28 +904,29 @@ def _init_tableau_project_registry(self, all_project_map: dict) -> None: logger.debug(f"Project {project.name} is added in project registry") projects_to_ingest[project.id] = project - # We rely on automatic browse paths (v2) when creating containers. That's why we need to sort the projects here. - # Otherwise, nested projects will not have the correct browse paths if not created in correct order / hierarchy. - self.tableau_project_registry = OrderedDict( - sorted(projects_to_ingest.items(), key=lambda item: len(item[1].path)) - ) - if self.config.extract_project_hierarchy is False: logger.debug( "Skipping project hierarchy processing as configuration extract_project_hierarchy is " "disabled" ) - return + else: + logger.debug( + "Reevaluating projects as extract_project_hierarchy is enabled" + ) - logger.debug("Reevaluating projects as extract_project_hierarchy is enabled") + for project in list_of_skip_projects: + if ( + project.parent_id in projects_to_ingest + and self._is_denied_project(project) is False + ): + logger.debug(f"Project {project.name} is added in project registry") + projects_to_ingest[project.id] = project - for project in list_of_skip_projects: - if ( - project.parent_id in self.tableau_project_registry - and self._is_denied_project(project) is False - ): - logger.debug(f"Project {project.name} is added in project registry") - self.tableau_project_registry[project.id] = project + # We rely on automatic browse paths (v2) when creating containers. That's why we need to sort the projects here. + # Otherwise, nested projects will not have the correct browse paths if not created in correct order / hierarchy. + self.tableau_project_registry = OrderedDict( + sorted(projects_to_ingest.items(), key=lambda item: len(item[1].path)) + ) def _init_datasource_registry(self) -> None: if self.server is None: diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py index 02eedb67f4cc2..a00a52ae54207 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py @@ -1,10 +1,19 @@ from dataclasses import dataclass, field from typing import Optional, Tuple -from datahub.ingestion.api.report import EntityFilterReport +from datahub.ingestion.api.report import EntityFilterReport, Report from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport from datahub.utilities.lossy_collections import LossyDict, LossyList +from datahub.utilities.perf_timer import PerfTimer + + +@dataclass +class UnityCatalogUsagePerfReport(Report): + get_queries_timer: PerfTimer = field(default_factory=PerfTimer) + sql_parsing_timer: PerfTimer = field(default_factory=PerfTimer) + aggregator_add_event_timer: PerfTimer = field(default_factory=PerfTimer) + gen_operation_timer: PerfTimer = field(default_factory=PerfTimer) @dataclass @@ -27,6 +36,9 @@ class UnityCatalogReport(IngestionStageReport, ProfilingSqlReport): num_queries_missing_table: int = 0 # Can be due to pattern filter num_queries_duplicate_table: int = 0 num_queries_parsed_by_spark_plan: int = 0 + usage_perf_report: UnityCatalogUsagePerfReport = field( + default_factory=UnityCatalogUsagePerfReport + ) # Distinguish from Operations emitted for created / updated timestamps num_operational_stats_workunits_emitted: int = 0 diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py index 5eec2ca587ead..08482c9d2fa3b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py @@ -81,20 +81,25 @@ def _get_workunits_internal( table_map[f"{ref.schema}.{ref.table}"].append(ref) table_map[ref.qualified_table_name].append(ref) - for query in self._get_queries(): - self.report.num_queries += 1 - table_info = self._parse_query(query, table_map) - if table_info is not None: - if self.config.include_operational_stats: - yield from self._generate_operation_workunit(query, table_info) - for source_table in table_info.source_tables: - self.usage_aggregator.aggregate_event( - resource=source_table, - start_time=query.start_time, - query=query.query_text, - user=query.user_name, - fields=[], - ) + with self.report.usage_perf_report.get_queries_timer as current_timer: + for query in self._get_queries(): + self.report.num_queries += 1 + with current_timer.pause(): + table_info = self._parse_query(query, table_map) + if table_info is not None: + if self.config.include_operational_stats: + yield from self._generate_operation_workunit( + query, table_info + ) + for source_table in table_info.source_tables: + with self.report.usage_perf_report.aggregator_add_event_timer: + self.usage_aggregator.aggregate_event( + resource=source_table, + start_time=query.start_time, + query=query.query_text, + user=query.user_name, + fields=[], + ) if not self.report.num_queries: logger.warning("No queries found in the given time range.") @@ -117,29 +122,34 @@ def _get_workunits_internal( def _generate_operation_workunit( self, query: Query, table_info: QueryTableInfo ) -> Iterable[MetadataWorkUnit]: - if ( - not query.statement_type - or query.statement_type not in OPERATION_STATEMENT_TYPES - ): - return None + with self.report.usage_perf_report.gen_operation_timer: + if ( + not query.statement_type + or query.statement_type not in OPERATION_STATEMENT_TYPES + ): + return None - # Not sure about behavior when there are multiple target tables. This is a best attempt. - for target_table in table_info.target_tables: - operation_aspect = OperationClass( - timestampMillis=int(time.time() * 1000), - lastUpdatedTimestamp=int(query.end_time.timestamp() * 1000), - actor=( - self.user_urn_builder(query.user_name) if query.user_name else None - ), - operationType=OPERATION_STATEMENT_TYPES[query.statement_type], - affectedDatasets=[ - self.table_urn_builder(table) for table in table_info.source_tables - ], - ) - self.report.num_operational_stats_workunits_emitted += 1 - yield MetadataChangeProposalWrapper( - entityUrn=self.table_urn_builder(target_table), aspect=operation_aspect - ).as_workunit() + # Not sure about behavior when there are multiple target tables. This is a best attempt. + for target_table in table_info.target_tables: + operation_aspect = OperationClass( + timestampMillis=int(time.time() * 1000), + lastUpdatedTimestamp=int(query.end_time.timestamp() * 1000), + actor=( + self.user_urn_builder(query.user_name) + if query.user_name + else None + ), + operationType=OPERATION_STATEMENT_TYPES[query.statement_type], + affectedDatasets=[ + self.table_urn_builder(table) + for table in table_info.source_tables + ], + ) + self.report.num_operational_stats_workunits_emitted += 1 + yield MetadataChangeProposalWrapper( + entityUrn=self.table_urn_builder(target_table), + aspect=operation_aspect, + ).as_workunit() def _get_queries(self) -> Iterable[Query]: try: @@ -153,18 +163,23 @@ def _get_queries(self) -> Iterable[Query]: def _parse_query( self, query: Query, table_map: TableMap ) -> Optional[QueryTableInfo]: - table_info = self._parse_query_via_lineage_runner(query.query_text) - if table_info is None and query.statement_type == QueryStatementType.SELECT: - table_info = self._parse_query_via_spark_sql_plan(query.query_text) + with self.report.usage_perf_report.sql_parsing_timer: + table_info = self._parse_query_via_lineage_runner(query.query_text) + if table_info is None and query.statement_type == QueryStatementType.SELECT: + table_info = self._parse_query_via_spark_sql_plan(query.query_text) - if table_info is None: - self.report.num_queries_dropped_parse_failure += 1 - return None - else: - return QueryTableInfo( - source_tables=self._resolve_tables(table_info.source_tables, table_map), - target_tables=self._resolve_tables(table_info.target_tables, table_map), - ) + if table_info is None: + self.report.num_queries_dropped_parse_failure += 1 + return None + else: + return QueryTableInfo( + source_tables=self._resolve_tables( + table_info.source_tables, table_map + ), + target_tables=self._resolve_tables( + table_info.target_tables, table_map + ), + ) def _parse_query_via_lineage_runner(self, query: str) -> Optional[StringTableInfo]: try: diff --git a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py index d945e135f0012..f5908753affde 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sql_parsing_aggregator.py @@ -39,10 +39,12 @@ ColumnRef, DownstreamColumnRef, SqlParsingResult, + _sqlglot_lineage_cached, infer_output_schema, sqlglot_lineage, ) from datahub.sql_parsing.sqlglot_utils import ( + _parse_statement, generate_hash, get_query_fingerprint, try_format_query, @@ -222,6 +224,9 @@ class SqlAggregatorReport(Report): sql_parsing_timer: PerfTimer = dataclasses.field(default_factory=PerfTimer) sql_fingerprinting_timer: PerfTimer = dataclasses.field(default_factory=PerfTimer) sql_formatting_timer: PerfTimer = dataclasses.field(default_factory=PerfTimer) + sql_parsing_cache_stats: Optional[dict] = dataclasses.field(default=None) + parse_statement_cache_stats: Optional[dict] = dataclasses.field(default=None) + format_query_cache_stats: Optional[dict] = dataclasses.field(default=None) # Other lineage loading metrics. num_known_query_lineage: int = 0 @@ -239,6 +244,7 @@ class SqlAggregatorReport(Report): queries_with_non_authoritative_session: LossyList[QueryId] = dataclasses.field( default_factory=LossyList ) + make_schema_resolver_timer: PerfTimer = dataclasses.field(default_factory=PerfTimer) # Lineage-related. schema_resolver_count: Optional[int] = None @@ -272,6 +278,10 @@ def compute_stats(self) -> None: self.num_temp_sessions = len(self._aggregator._temp_lineage_map) self.num_inferred_temp_schemas = len(self._aggregator._inferred_temp_schemas) + self.sql_parsing_cache_stats = _sqlglot_lineage_cached.cache_info()._asdict() + self.parse_statement_cache_stats = _parse_statement.cache_info()._asdict() + self.format_query_cache_stats = try_format_query.cache_info()._asdict() + return super().compute_stats() @@ -679,11 +689,12 @@ def add_observed_query( # All queries with no session ID are assumed to be part of the same session. session_id = observed.session_id or _MISSING_SESSION_ID - # Load in the temp tables for this session. - schema_resolver: SchemaResolverInterface = ( - self._make_schema_resolver_for_session(session_id) - ) - session_has_temp_tables = schema_resolver.includes_temp_tables() + with self.report.make_schema_resolver_timer: + # Load in the temp tables for this session. + schema_resolver: SchemaResolverInterface = ( + self._make_schema_resolver_for_session(session_id) + ) + session_has_temp_tables = schema_resolver.includes_temp_tables() # Run the SQL parser. parsed = self._run_sql_parser( diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py index 27d99a14c0520..0806d0ec774fe 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_lineage.py @@ -1020,9 +1020,8 @@ def _sqlglot_lineage_inner( ) -@functools.lru_cache(maxsize=SQL_PARSE_RESULT_CACHE_SIZE) -def sqlglot_lineage( - sql: str, +def _sqlglot_lineage_nocache( + sql: sqlglot.exp.ExpOrStr, schema_resolver: SchemaResolverInterface, default_db: Optional[str] = None, default_schema: Optional[str] = None, @@ -1091,6 +1090,28 @@ def sqlglot_lineage( return SqlParsingResult.make_from_error(e) +_sqlglot_lineage_cached = functools.lru_cache(maxsize=SQL_PARSE_RESULT_CACHE_SIZE)( + _sqlglot_lineage_nocache +) + + +def sqlglot_lineage( + sql: sqlglot.exp.ExpOrStr, + schema_resolver: SchemaResolverInterface, + default_db: Optional[str] = None, + default_schema: Optional[str] = None, + default_dialect: Optional[str] = None, +) -> SqlParsingResult: + if schema_resolver.includes_temp_tables(): + return _sqlglot_lineage_nocache( + sql, schema_resolver, default_db, default_schema, default_dialect + ) + else: + return _sqlglot_lineage_cached( + sql, schema_resolver, default_db, default_schema, default_dialect + ) + + def create_lineage_sql_parsed_result( query: str, default_db: Optional[str], diff --git a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py index 56e4c806eb0c3..71245353101f6 100644 --- a/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py +++ b/metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py @@ -347,6 +347,9 @@ def detach_ctes( dialect = get_dialect(platform) statement = parse_statement(sql, dialect=dialect) + if not cte_mapping: + return statement + def replace_cte_refs(node: sqlglot.exp.Expression) -> sqlglot.exp.Expression: if ( isinstance(node, sqlglot.exp.Identifier) diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json index a7d46a2412b6c..631b28c64f14d 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json +++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_lineage_usage_golden.json @@ -498,5 +498,27 @@ "runId": "bigquery-2022_02_03-07_00_00", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,project-id-1.bigquery-dataset-1.view-1,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1643871600000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "bigquery-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json index 537eeb5623149..fcf65130df975 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json +++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_mcp_golden.json @@ -98,7 +98,8 @@ "platform": "bigquery", "env": "PROD", "project_id": "project-id-1", - "dataset_id": "bigquery-dataset-1" + "dataset_id": "bigquery-dataset-1", + "location": "US" }, "externalUrl": "https://console.cloud.google.com/bigquery?project=project-id-1&ws=!1m4!1m3!3m2!1sproject-id-1!2sbigquery-dataset-1", "name": "bigquery-dataset-1", diff --git a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json index a2d21b84f19e8..1b79e8464c05f 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json +++ b/metadata-ingestion/tests/integration/bigquery_v2/bigquery_queries_mcps_golden.json @@ -12588,5 +12588,555 @@ "runId": "bigquery-queries-2024_08_19-07_00_00", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.derived_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322481569 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_external_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322505477 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_view_on_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322464098 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_sharded_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322500148 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_wildcard_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322502689 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.destination_table_of_select_query,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322510656 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_another_project,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322478955 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.derived_table_from_timetravelled_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322508214 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_base,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322460257 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view_and_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322472836 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.sharded_table1,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322491425 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.lineage_from_tmp_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322457731 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_snapshot_on_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322471500 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.materialized_view_from_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322476091 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_multiple_tables,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322484293 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.table_from_view,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322465459 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_ingestion_time_partition,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322495660 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db_2.table_from_other_db,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322467835 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.view_from_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322462741 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_nested_fields,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322498418 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_3.base_table_2,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322477705 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging-2.smoke_test_db_4.table_with_integer_range_partition,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322497080 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.base_table,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322452660 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.partition_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322448864 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,gcp-staging.smoke_test_db.usage_test,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1724322445357 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1724050800000, + "runId": "bigquery-queries-2024_08_19-07_00_00", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py index 1934e135457af..f9481d1d83d8b 100644 --- a/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py +++ b/metadata-ingestion/tests/integration/bigquery_v2/test_bigquery.py @@ -103,7 +103,9 @@ def test_bigquery_v2_ingest( mcp_output_path = "{}/{}".format(tmp_path, "bigquery_mcp_output.json") dataset_name = "bigquery-dataset-1" - get_datasets_for_project_id.return_value = [BigqueryDataset(name=dataset_name)] + get_datasets_for_project_id.return_value = [ + BigqueryDataset(name=dataset_name, location="US") + ] table_list_item = TableListItem( {"tableReference": {"projectId": "", "datasetId": "", "tableId": ""}} @@ -321,7 +323,9 @@ def test_bigquery_queries_v2_ingest( mcp_output_path = "{}/{}".format(tmp_path, "bigquery_mcp_output.json") dataset_name = "bigquery-dataset-1" - get_datasets_for_project_id.return_value = [BigqueryDataset(name=dataset_name)] + get_datasets_for_project_id.return_value = [ + BigqueryDataset(name=dataset_name, location="US") + ] table_list_item = TableListItem( {"tableReference": {"projectId": "", "datasetId": "", "tableId": ""}} diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json index 91095966eddd1..56e745d4f9aca 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_events_golden.json @@ -1886,6 +1886,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.accepted_values_orders_status__placed__shipped__completed__return_pending__returned.be6b5b5ec3", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -1939,8 +1941,8 @@ }, "assertionUrn": "urn:li:assertion:b052a324c05327985f3b579a19ad7579", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -1974,6 +1976,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.accepted_values_stg_orders_status__placed__shipped__completed__return_pending__returned.080fb20aad", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2027,8 +2031,8 @@ }, "assertionUrn": "urn:li:assertion:da743330013b7e3e3707ac6e526ab408", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2062,6 +2066,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.accepted_values_stg_payments_payment_method__credit_card__coupon__bank_transfer__gift_card.3c3820f278", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_payments", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2115,8 +2121,8 @@ }, "assertionUrn": "urn:li:assertion:2887b9c826e0be6296a37833bdc380bd", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2150,6 +2156,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.assert_total_payment_amount_is_positive", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2191,8 +2199,8 @@ }, "assertionUrn": "urn:li:assertion:591d8dc8939e0cf9bf0fd03264ad1a0e", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2226,6 +2234,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.dbt_expectations_expect_column_values_to_be_between_customers_customer_id__2000000__0__customer_id_is_not_null__False.e67667298f", + "dbt_test_upstream_unique_id": "model.jaffle_shop.customers", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2293,6 +2303,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.dbt_expectations_expect_column_values_to_be_in_set_customers_customer_id__customer_id_is_not_null__0__1__2.81450cfcd8", + "dbt_test_upstream_unique_id": "model.jaffle_shop.customers", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2349,8 +2361,8 @@ }, "assertionUrn": "urn:li:assertion:bf7fd2b46d2c32ee9bb036acd1559782", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2384,6 +2396,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.dbt_expectations_expect_column_values_to_not_be_in_set_orders_credit_card_amount__credit_card_amount_is_not_null__0.888b06036c", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2435,8 +2449,8 @@ }, "assertionUrn": "urn:li:assertion:1c217b7587a0cad47a07a09bfe154055", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2470,6 +2484,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_customers_customer_id.5c9bf9911d", + "dbt_test_upstream_unique_id": "model.jaffle_shop.customers", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2516,8 +2532,8 @@ }, "assertionUrn": "urn:li:assertion:44519aa345bf3ea896179f9f352ae946", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2551,6 +2567,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_orders_amount.106140f9fd", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2597,8 +2615,8 @@ }, "assertionUrn": "urn:li:assertion:bbd78a070092f54313153abec49f6f31", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2632,6 +2650,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_orders_bank_transfer_amount.7743500c49", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2678,8 +2698,8 @@ }, "assertionUrn": "urn:li:assertion:52d06197762e3608d94609e96f03a0a7", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2713,6 +2733,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_orders_coupon_amount.ab90c90625", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2759,8 +2781,8 @@ }, "assertionUrn": "urn:li:assertion:ca065a99637630468f688717590beeab", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2794,6 +2816,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_orders_credit_card_amount.d3ca593b59", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2840,8 +2864,8 @@ }, "assertionUrn": "urn:li:assertion:7a305acc5fc049dc9bbd141b814461d0", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2875,6 +2899,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_orders_customer_id.c5f02694af", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -2921,8 +2947,8 @@ }, "assertionUrn": "urn:li:assertion:11087a3d7ae178df22c42922ac8ef8ad", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -2956,6 +2982,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_orders_gift_card_amount.413a0d2d7a", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3002,8 +3030,8 @@ }, "assertionUrn": "urn:li:assertion:b301bb47cc4ebce4e78a194b3de11f25", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3037,6 +3065,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_orders_order_id.cf6c17daed", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3083,8 +3113,8 @@ }, "assertionUrn": "urn:li:assertion:2e9117138dcc9facda66f1efd55a8cd7", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3118,6 +3148,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_stg_customers_customer_id.e2cfb1f9aa", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_customers", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3164,8 +3196,8 @@ }, "assertionUrn": "urn:li:assertion:25ebf4faa9b1654ef54c46d975ca0a81", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3199,6 +3231,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_stg_orders_order_id.81cfe2fe64", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3245,8 +3279,8 @@ }, "assertionUrn": "urn:li:assertion:b03abcc447aac70bbebb22a8a9d7dbbe", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3280,6 +3314,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.not_null_stg_payments_payment_id.c19cc50075", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_payments", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3326,8 +3362,8 @@ }, "assertionUrn": "urn:li:assertion:c1eebc71f36690e4523adca30314e927", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3339,7 +3375,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "entityUrn": "urn:li:assertion:3191c2851901165afc07c3bd7f5f590a", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -3355,12 +3391,14 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "entityUrn": "urn:li:assertion:3191c2851901165afc07c3bd7f5f590a", "changeType": "UPSERT", "aspectName": "assertionInfo", "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.relationships_orders_customer_id__customer_id__ref_customers_.c6ec7f58f2", + "dbt_test_upstream_unique_id": "model.jaffle_shop.customers", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3401,7 +3439,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "entityUrn": "urn:li:assertion:3191c2851901165afc07c3bd7f5f590a", "changeType": "UPSERT", "aspectName": "assertionRunEvent", "aspect": { @@ -3414,10 +3452,10 @@ "type": "SUCCESS", "nativeResults": {} }, - "assertionUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "assertionUrn": "urn:li:assertion:3191c2851901165afc07c3bd7f5f590a", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3429,12 +3467,30 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "entityUrn": "urn:li:assertion:0331fa8a77015ca1a48d3a2fc90948fb", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:0331fa8a77015ca1a48d3a2fc90948fb", "changeType": "UPSERT", "aspectName": "assertionInfo", "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.relationships_orders_customer_id__customer_id__ref_customers_.c6ec7f58f2", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3475,7 +3531,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "entityUrn": "urn:li:assertion:0331fa8a77015ca1a48d3a2fc90948fb", "changeType": "UPSERT", "aspectName": "assertionRunEvent", "aspect": { @@ -3488,10 +3544,10 @@ "type": "SUCCESS", "nativeResults": {} }, - "assertionUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "assertionUrn": "urn:li:assertion:0331fa8a77015ca1a48d3a2fc90948fb", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3525,6 +3581,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.unique_customers_customer_id.c5af1ff4b1", + "dbt_test_upstream_unique_id": "model.jaffle_shop.customers", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3577,8 +3635,8 @@ }, "assertionUrn": "urn:li:assertion:c51ca9c4b5a1f964bef748f0b8968e71", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3612,6 +3670,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.unique_orders_order_id.fed79b3a6e", + "dbt_test_upstream_unique_id": "model.jaffle_shop.orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3664,8 +3724,8 @@ }, "assertionUrn": "urn:li:assertion:caa9b8060e214cecab88a92dc39c2e60", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3699,6 +3759,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.unique_stg_customers_customer_id.c7614daada", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_customers", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3751,8 +3813,8 @@ }, "assertionUrn": "urn:li:assertion:54bac90e6785bdefd8685ebf8814c429", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3786,6 +3848,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.unique_stg_orders_order_id.e3b841c71a", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_orders", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3838,8 +3902,8 @@ }, "assertionUrn": "urn:li:assertion:815963e1332b46a203504ba46ebfab24", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3873,6 +3937,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.jaffle_shop.unique_stg_payments_payment_id.3744510712", + "dbt_test_upstream_unique_id": "model.jaffle_shop.stg_payments", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v5.json", "manifest_version": "1.1.0", "manifest_adapter": "bigquery", @@ -3925,8 +3991,8 @@ }, "assertionUrn": "urn:li:assertion:fac27f352406b941125292413afa8096", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -3936,6 +4002,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:0331fa8a77015ca1a48d3a2fc90948fb", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-2022_02_03-07_00_00", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "assertion", "entityUrn": "urn:li:assertion:11087a3d7ae178df22c42922ac8ef8ad", @@ -4034,7 +4116,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:44519aa345bf3ea896179f9f352ae946", + "entityUrn": "urn:li:assertion:3191c2851901165afc07c3bd7f5f590a", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4050,7 +4132,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:52d06197762e3608d94609e96f03a0a7", + "entityUrn": "urn:li:assertion:44519aa345bf3ea896179f9f352ae946", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4066,7 +4148,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:54bac90e6785bdefd8685ebf8814c429", + "entityUrn": "urn:li:assertion:52d06197762e3608d94609e96f03a0a7", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4082,7 +4164,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:591d8dc8939e0cf9bf0fd03264ad1a0e", + "entityUrn": "urn:li:assertion:54bac90e6785bdefd8685ebf8814c429", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4098,7 +4180,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:7a305acc5fc049dc9bbd141b814461d0", + "entityUrn": "urn:li:assertion:591d8dc8939e0cf9bf0fd03264ad1a0e", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4114,7 +4196,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:815963e1332b46a203504ba46ebfab24", + "entityUrn": "urn:li:assertion:7a305acc5fc049dc9bbd141b814461d0", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4130,7 +4212,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b03abcc447aac70bbebb22a8a9d7dbbe", + "entityUrn": "urn:li:assertion:815963e1332b46a203504ba46ebfab24", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4146,7 +4228,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b052a324c05327985f3b579a19ad7579", + "entityUrn": "urn:li:assertion:b03abcc447aac70bbebb22a8a9d7dbbe", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -4162,7 +4244,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:b210dbd31c2ee4efc0c24a9e4cf125ef", + "entityUrn": "urn:li:assertion:b052a324c05327985f3b579a19ad7579", "changeType": "UPSERT", "aspectName": "status", "aspect": { diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json index d2c7165970681..42a416473ae24 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_prefer_sql_parser_lineage_golden.json @@ -4498,6 +4498,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.assert_source_actor_last_update_is_recent", + "dbt_test_upstream_unique_id": "source.sample_dbt.pagila.actor", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -4574,6 +4576,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.is_email_monthly_billing_with_cust_email.57a935ce99", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -4656,6 +4660,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.not_null_monthly_billing_with_cust_billing_month.19ce54289b", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -4737,6 +4743,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.not_null_monthly_billing_with_cust_email.d405c2cc13", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -4796,7 +4804,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -4812,12 +4820,14 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "assertionInfo", "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.relationships_monthly_billing_with_cust_customer_id__customer_id__ref_customer_details_.653e08a90b", + "dbt_test_upstream_unique_id": "model.sample_dbt.customer_details", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -4826,10 +4836,10 @@ }, "type": "DATASET", "datasetAssertion": { - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", "scope": "DATASET_COLUMN", "fields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),customer_id)" ], "aggregation": "IDENTITY", "operator": "_NATIVE_", @@ -4858,20 +4868,20 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "assertionRunEvent", "aspect": { "json": { "timestampMillis": 1663355198239, "runId": "just-some-random-id", - "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} }, - "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "assertionUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -4886,12 +4896,30 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", "changeType": "UPSERT", "aspectName": "assertionInfo", "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.relationships_monthly_billing_with_cust_customer_id__customer_id__ref_customer_details_.653e08a90b", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -4900,10 +4928,10 @@ }, "type": "DATASET", "datasetAssertion": { - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", "scope": "DATASET_COLUMN", "fields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),customer_id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)" ], "aggregation": "IDENTITY", "operator": "_NATIVE_", @@ -4932,20 +4960,20 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", "changeType": "UPSERT", "aspectName": "assertionRunEvent", "aspect": { "json": { "timestampMillis": 1663355198239, "runId": "just-some-random-id", - "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} }, - "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "assertionUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", "partitionSpec": { "partition": "FULL_TABLE_SNAPSHOT", "type": "FULL_TABLE" @@ -4982,6 +5010,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.source_not_null_pagila_actor_actor_id.ad63829d3e", + "dbt_test_upstream_unique_id": "source.sample_dbt.pagila.actor", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5063,6 +5093,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.source_unique_pagila_actor_actor_id.76aff1935a", + "dbt_test_upstream_unique_id": "source.sample_dbt.pagila.actor", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5128,7 +5160,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -5144,7 +5176,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -5206,6 +5238,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-prefer-sql-parser-lineage", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "assertion", "entityUrn": "urn:li:assertion:f6a1fde3ab4919abcc04bdee93144958", diff --git a/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json b/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json index bec42f460e0b5..c281ea3eed0fa 100644 --- a/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json +++ b/metadata-ingestion/tests/integration/dbt/dbt_test_test_model_performance_golden.json @@ -638,8 +638,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -659,8 +659,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1097,8 +1097,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1118,8 +1118,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1420,8 +1420,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1441,8 +1441,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -1944,8 +1944,8 @@ "json": { "timestampMillis": 1663355198240, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "STARTED" } @@ -1965,8 +1965,8 @@ "json": { "timestampMillis": 1663355198242, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "status": "COMPLETE", "result": { @@ -5270,6 +5270,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.assert_source_actor_last_update_is_recent", + "dbt_test_upstream_unique_id": "source.sample_dbt.pagila.actor", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5311,8 +5313,8 @@ }, "assertionUrn": "urn:li:assertion:ba2c6ba830d407d539452f4cf46c92a6", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5346,6 +5348,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.is_email_monthly_billing_with_cust_email.57a935ce99", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5393,8 +5397,8 @@ }, "assertionUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5428,6 +5432,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.not_null_monthly_billing_with_cust_billing_month.19ce54289b", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5474,8 +5480,8 @@ }, "assertionUrn": "urn:li:assertion:c456eccf6440c6e3388c584689a74d91", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5509,6 +5515,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.not_null_monthly_billing_with_cust_email.d405c2cc13", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5555,8 +5563,8 @@ }, "assertionUrn": "urn:li:assertion:f812b73477d81e6af283d918cb59e7bf", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5568,7 +5576,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "dataPlatformInstance", "aspect": { @@ -5584,12 +5592,14 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "assertionInfo", "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.relationships_monthly_billing_with_cust_customer_id__customer_id__ref_customer_details_.653e08a90b", + "dbt_test_upstream_unique_id": "model.sample_dbt.customer_details", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5598,10 +5608,10 @@ }, "type": "DATASET", "datasetAssertion": { - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", "scope": "DATASET_COLUMN", "fields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),customer_id)" ], "aggregation": "IDENTITY", "operator": "_NATIVE_", @@ -5630,23 +5640,23 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "assertionRunEvent", "aspect": { "json": { "timestampMillis": 1663355198239, "runId": "just-some-random-id", - "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} }, - "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "assertionUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5658,12 +5668,30 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:dbt" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-model-performance", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", "changeType": "UPSERT", "aspectName": "assertionInfo", "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.relationships_monthly_billing_with_cust_customer_id__customer_id__ref_customer_details_.653e08a90b", + "dbt_test_upstream_unique_id": "model.sample_dbt.monthly_billing_with_cust", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5672,10 +5700,10 @@ }, "type": "DATASET", "datasetAssertion": { - "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", "scope": "DATASET_COLUMN", "fields": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD),customer_id)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD),customer_id)" ], "aggregation": "IDENTITY", "operator": "_NATIVE_", @@ -5704,23 +5732,23 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", "changeType": "UPSERT", "aspectName": "assertionRunEvent", "aspect": { "json": { "timestampMillis": 1663355198239, "runId": "just-some-random-id", - "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.customer_details,PROD)", + "asserteeUrn": "urn:li:dataset:(urn:li:dataPlatform:dbt,pagila.public.an-aliased-view-for-monthly-billing,PROD)", "status": "COMPLETE", "result": { "type": "SUCCESS", "nativeResults": {} }, - "assertionUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "assertionUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5754,6 +5782,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.source_not_null_pagila_actor_actor_id.ad63829d3e", + "dbt_test_upstream_unique_id": "source.sample_dbt.pagila.actor", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5800,8 +5830,8 @@ }, "assertionUrn": "urn:li:assertion:f6a1fde3ab4919abcc04bdee93144958", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5835,6 +5865,8 @@ "aspect": { "json": { "customProperties": { + "dbt_unique_id": "test.sample_dbt.source_unique_pagila_actor_actor_id.76aff1935a", + "dbt_test_upstream_unique_id": "source.sample_dbt.pagila.actor", "manifest_schema": "https://schemas.getdbt.com/dbt/manifest/v11.json", "manifest_version": "1.7.3", "manifest_adapter": "postgres", @@ -5887,8 +5919,8 @@ }, "assertionUrn": "urn:li:assertion:60ce4aad7ff6dbff7004da0f2258c9df", "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" } } }, @@ -5900,7 +5932,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:08c35a6481d3c37c93eaf9e424faa6d5", + "entityUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -5916,7 +5948,7 @@ }, { "entityType": "assertion", - "entityUrn": "urn:li:assertion:10f2a119dedcaab43afc47ff13d9cb5b", + "entityUrn": "urn:li:assertion:1f37cbfd95ea2a1d46b8e94828805eb1", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -5978,6 +6010,22 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "assertion", + "entityUrn": "urn:li:assertion:f3a0dbf71b6cbf7112ff44925f475ff5", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "dbt-model-performance", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "assertion", "entityUrn": "urn:li:assertion:f6a1fde3ab4919abcc04bdee93144958", diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index d213cffa78045..390d8d7698dd4 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -339,9 +339,13 @@ def test_dbt_tests(test_resources_dir, pytestconfig, tmp_path, mock_time, **kwar (test_resources_dir / "jaffle_shop_catalog.json").resolve() ), target_platform="postgres", - test_results_path=str( - (test_resources_dir / "jaffle_shop_test_results.json").resolve() - ), + run_results_paths=[ + str( + ( + test_resources_dir / "jaffle_shop_test_results.json" + ).resolve() + ) + ], ), ), sink=DynamicTypedConfig(type="file", config={"filename": str(output_file)}), @@ -442,9 +446,13 @@ def test_dbt_tests_only_assertions( (test_resources_dir / "jaffle_shop_catalog.json").resolve() ), target_platform="postgres", - test_results_path=str( - (test_resources_dir / "jaffle_shop_test_results.json").resolve() - ), + run_results_paths=[ + str( + ( + test_resources_dir / "jaffle_shop_test_results.json" + ).resolve() + ) + ], entities_enabled=DBTEntitiesEnabled( test_results=EmitDirective.ONLY ), @@ -465,7 +473,7 @@ def test_dbt_tests_only_assertions( ) > 20 ) - number_of_valid_assertions_in_test_results = 23 + number_of_valid_assertions_in_test_results = 24 assert ( mce_helpers.assert_entity_urn_like( entity_type="assertion", regex_pattern="urn:li:assertion:", file=output_file @@ -518,9 +526,13 @@ def test_dbt_only_test_definitions_and_results( (test_resources_dir / "jaffle_shop_catalog.json").resolve() ), target_platform="postgres", - test_results_path=str( - (test_resources_dir / "jaffle_shop_test_results.json").resolve() - ), + run_results_paths=[ + str( + ( + test_resources_dir / "jaffle_shop_test_results.json" + ).resolve() + ) + ], entities_enabled=DBTEntitiesEnabled( sources=EmitDirective.NO, seeds=EmitDirective.NO, @@ -542,7 +554,7 @@ def test_dbt_only_test_definitions_and_results( ) > 20 ) - number_of_assertions = 24 + number_of_assertions = 25 assert ( mce_helpers.assert_entity_urn_like( entity_type="assertion", regex_pattern="urn:li:assertion:", file=output_file diff --git a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json index f7ee62201a863..95671b4f5a09c 100644 --- a/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json +++ b/metadata-ingestion/tests/integration/delta_lake/delta_lake_minio_mces_golden.json @@ -400,5 +400,27 @@ "runId": "delta-lake-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-test-bucket/delta_tables/sales,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655664815399 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1672531200000, + "runId": "delta-lake-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json index 24344d6a26664..0e88106d79175 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_allow_table.json @@ -1838,5 +1838,93 @@ "runId": "allow_table.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831477768 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655664815399 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/my_table_no_name,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831649788 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables/level1/my_table_inner,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831866541 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "allow_table.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json index 717481b253429..d04cc78971190 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_inner_table.json @@ -1785,5 +1785,93 @@ "runId": "inner_table.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831477768 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/sales,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655664815399 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_no_name,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831649788 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/level1/my_table_inner,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831866541 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "inner_table.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json index f446c2deb6a84..0b1a8140cd649 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_relative_path.json @@ -368,5 +368,27 @@ "runId": "relative_path.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,delta_tables/my_table_basic,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831477768 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "relative_path.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json index 100f93fdaf5d3..cba70b2f54b18 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_single_table.json @@ -724,5 +724,27 @@ "runId": "single_table.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,tests/integration/delta_lake/test_data/delta_tables/my_table_basic,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1655831477768 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_table.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json index cb40e152f67cc..28e517cc8c319 100644 --- a/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json +++ b/metadata-ingestion/tests/integration/delta_lake/golden_files/local/golden_mces_tables_with_nested_datatypes.json @@ -1841,5 +1841,137 @@ "runId": "tables_with_nested_datatypes.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_nested_struct_1,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1709535906725 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_nested_struct,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1709110542636 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_string_and_array,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1708329078869 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_string_and_array_of_struct,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1708329897384 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_nested_struct_2,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1709536366367 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:delta-lake,my-platform.tests/integration/delta_lake/test_data/delta_tables_nested_datatype/table_with_string_and_nested_array_of_numbers,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1708330178404 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "tables_with_nested_datatypes.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json index 63ffdda8c5b6f..f178e97e78fa0 100644 --- a/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json +++ b/metadata-ingestion/tests/integration/looker/golden_test_independent_look_ingest.json @@ -569,6 +569,250 @@ "pipelineName": "execution-1" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:359bb937bcf712f03c72318506aa32b9", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "folder_id": "personal-folder-id" + }, + "name": "Personal", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:359bb937bcf712f03c72318506aa32b9", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:359bb937bcf712f03c72318506aa32b9", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:359bb937bcf712f03c72318506aa32b9", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:359bb937bcf712f03c72318506aa32b9", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": { + "urn": "urn:li:chart:(looker,dashboard_elements.looks_2)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.chart.ChartInfo": { + "customProperties": { + "upstream_fields": "order.placed_date" + }, + "title": "Personal Look", + "description": "I am not part of any Dashboard and in personal folder", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "chartUrl": "https://looker.company.com/looks/2", + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Folders/Personal" + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.looks_2)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Look" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.looks_2)", + "changeType": "UPSERT", + "aspectName": "embed", + "aspect": { + "json": { + "renderUrl": "https://looker.company.com/embed/looks/2" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.looks_2)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:359bb937bcf712f03c72318506aa32b9" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.looks_2)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.looks_2)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + }, + { + "id": "urn:li:container:359bb937bcf712f03c72318506aa32b9", + "urn": "urn:li:container:359bb937bcf712f03c72318506aa32b9" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", @@ -666,6 +910,103 @@ "pipelineName": "execution-1" } }, +{ + "entityType": "container", + "entityUrn": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "model_name": "order_model" + }, + "name": "order_model", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "LookML Model" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Explore" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, { "entityType": "container", "entityUrn": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5", @@ -939,6 +1280,182 @@ "pipelineName": "execution-1" } }, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Explore/order_model" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.explore.label": "My Explore View", + "looker.explore.file": "test_source_file.lkml" + }, + "externalUrl": "https://looker.company.com/explore/order_model/order_explore", + "name": "My Explore View", + "description": "lorem ipsum", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.underlying_view,PROD)", + "type": "VIEW" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "order_explore", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "dim1", + "nullable": false, + "description": "dimension one description", + "label": "Dimensions One Label", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Explore" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", + "changeType": "UPSERT", + "aspectName": "embed", + "aspect": { + "json": { + "renderUrl": "https://looker.company.com/embed/explore/order_model/order_explore" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,order_model.explore.order_explore,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Explore" + }, + { + "id": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60", + "urn": "urn:li:container:df4ee66abd19b668c88bfe4408f87e60" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { diff --git a/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json b/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json new file mode 100644 index 0000000000000..63ffdda8c5b6f --- /dev/null +++ b/metadata-ingestion/tests/integration/looker/golden_test_non_personal_independent_look.json @@ -0,0 +1,1232 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "folder_id": "shared-folder-id" + }, + "name": "Shared", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Folder" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:691314a7b63628684d62a14861d057a8", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": { + "urn": "urn:li:chart:(looker,dashboard_elements.2)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.chart.ChartInfo": { + "customProperties": { + "upstream_fields": "calc,dim1" + }, + "title": "", + "description": "Some text", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "chartUrl": "https://looker.company.com/x/", + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Folders/Shared/foo" + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Look" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + }, + { + "id": "urn:li:container:691314a7b63628684d62a14861d057a8", + "urn": "urn:li:container:691314a7b63628684d62a14861d057a8" + }, + { + "id": "urn:li:dashboard:(looker,dashboards.1)", + "urn": "urn:li:dashboard:(looker,dashboards.1)" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": { + "urn": "urn:li:dashboard:(looker,dashboards.1)", + "aspects": [ + { + "com.linkedin.pegasus2avro.dashboard.DashboardInfo": { + "customProperties": {}, + "title": "foo", + "description": "lorem ipsum", + "charts": [ + "urn:li:chart:(looker,dashboard_elements.2)" + ], + "datasets": [], + "lastModified": { + "created": { + "time": 1586847600000, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 1586847600000, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "https://looker.company.com/dashboards/1" + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Folders/Shared" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,dashboards.1)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:691314a7b63628684d62a14861d057a8" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,dashboards.1)", + "changeType": "UPSERT", + "aspectName": "embed", + "aspect": { + "json": { + "renderUrl": "https://looker.company.com/embed/dashboards/1" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,dashboards.1)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + }, + { + "id": "urn:li:container:691314a7b63628684d62a14861d057a8", + "urn": "urn:li:container:691314a7b63628684d62a14861d057a8" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.2)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,dashboard_elements.2),calc)", + "schemaField": { + "fieldPath": "calc", + "nullable": false, + "description": "", + "label": "foobar", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD),dim1)", + "schemaField": { + "fieldPath": "dim1", + "nullable": false, + "description": "dimension one description", + "label": "Dimensions One Label", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(looker,dashboards.1)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [ + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:chart:(looker,dashboard_elements.2),calc)", + "schemaField": { + "fieldPath": "calc", + "nullable": false, + "description": "", + "label": "foobar", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "isPartOfKey": false + } + }, + { + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD),dim1)", + "schemaField": { + "fieldPath": "dim1", + "nullable": false, + "description": "dimension one description", + "label": "Dimensions One Label", + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": { + "urn": "urn:li:chart:(looker,dashboard_elements.looks_1)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.chart.ChartInfo": { + "customProperties": { + "upstream_fields": "sales.profit" + }, + "title": "Outer Look", + "description": "I am not part of any Dashboard", + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "chartUrl": "https://looker.company.com/looks/1", + "inputs": [ + { + "string": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Folders/Shared" + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.looks_1)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Look" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.looks_1)", + "changeType": "UPSERT", + "aspectName": "embed", + "aspect": { + "json": { + "renderUrl": "https://looker.company.com/embed/looks/1" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.looks_1)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:691314a7b63628684d62a14861d057a8" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.looks_1)", + "changeType": "UPSERT", + "aspectName": "inputFields", + "aspect": { + "json": { + "fields": [] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "chart", + "entityUrn": "urn:li:chart:(looker,dashboard_elements.looks_1)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Folders" + }, + { + "id": "urn:li:container:691314a7b63628684d62a14861d057a8", + "urn": "urn:li:container:691314a7b63628684d62a14861d057a8" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "model_name": "data" + }, + "name": "data", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "LookML Model" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Explore" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "looker", + "env": "PROD", + "model_name": "sales_model" + }, + "name": "sales_model", + "env": "PROD" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:looker" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "LookML Model" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Explore" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Explore/data" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.explore.label": "My Explore View", + "looker.explore.file": "test_source_file.lkml" + }, + "externalUrl": "https://looker.company.com/explore/data/my_view", + "name": "My Explore View", + "description": "lorem ipsum", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.underlying_view,PROD)", + "type": "VIEW" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "my_view", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "dim1", + "nullable": false, + "description": "dimension one description", + "label": "Dimensions One Label", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Explore" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "changeType": "UPSERT", + "aspectName": "embed", + "aspect": { + "json": { + "renderUrl": "https://looker.company.com/embed/explore/data/my_view" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:59a5aa45397364e6882e793f1bc77b42" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,data.explore.my_view,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Explore" + }, + { + "id": "urn:li:container:59a5aa45397364e6882e793f1bc77b42", + "urn": "urn:li:container:59a5aa45397364e6882e793f1bc77b42" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Explore/sales_model" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.explore.label": "My Explore View", + "looker.explore.file": "test_source_file.lkml" + }, + "externalUrl": "https://looker.company.com/explore/sales_model/sales_explore", + "name": "My Explore View", + "description": "lorem ipsum", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.underlying_view,PROD)", + "type": "VIEW" + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "sales_explore", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "dim1", + "nullable": false, + "description": "dimension one description", + "label": "Dimensions One Label", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Explore" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", + "changeType": "UPSERT", + "aspectName": "embed", + "aspect": { + "json": { + "renderUrl": "https://looker.company.com/embed/explore/sales_model/sales_explore" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,sales_model.explore.sales_explore,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Explore" + }, + { + "id": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5", + "urn": "urn:li:container:d38ab60586a6e39b4cf63f14946969c5" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Dimension", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Dimension", + "description": "A tag that is applied to all dimension fields." + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Temporal", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Temporal", + "description": "A tag that is applied to all time-based (temporal) fields such as timestamps or durations." + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": { + "urn": "urn:li:tag:Measure", + "aspects": [ + { + "com.linkedin.pegasus2avro.tag.TagProperties": { + "name": "Measure", + "description": "A tag that is applied to all measures (metrics). Measures are typically the columns that you aggregate on" + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Dimension", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Measure", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Temporal", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "looker-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "execution-1" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py index dfda2fedf877c..7238a49cb37d2 100644 --- a/metadata-ingestion/tests/integration/looker/test_looker.py +++ b/metadata-ingestion/tests/integration/looker/test_looker.py @@ -1,10 +1,12 @@ import json import time from datetime import datetime +from pathlib import Path from typing import Any, Dict, List, Optional, Sequence, Tuple, Union, cast from unittest import mock import pytest +from _pytest.config import Config from freezegun import freeze_time from looker_sdk.rtl import transport from looker_sdk.rtl.transport import TransportOptions @@ -321,21 +323,47 @@ def setup_mock_look(mocked_client): description="I am not part of any Dashboard", query_id="1", folder=FolderBase(name="Shared", id="shared-folder-id"), - ) + ), + Look( + id="2", + title="Personal Look", + description="I am not part of any Dashboard and in personal folder", + query_id="2", + folder=FolderBase( + name="Personal", + id="personal-folder-id", + is_personal=True, + is_personal_descendant=True, + ), + ), ] - mocked_client.look.return_value = LookWithQuery( - query=Query( - id="1", - view="sales_explore", - model="sales_model", - fields=[ - "sales.profit", - ], - dynamic_fields=None, - filters=None, - ) - ) + mocked_client.look.side_effect = [ + LookWithQuery( + query=Query( + id="1", + view="sales_explore", + model="sales_model", + fields=[ + "sales.profit", + ], + dynamic_fields=None, + filters=None, + ) + ), + LookWithQuery( + query=Query( + id="2", + view="order_explore", + model="order_model", + fields=[ + "order.placed_date", + ], + dynamic_fields=None, + filters=None, + ) + ), + ] def setup_mock_soft_deleted_look(mocked_client): @@ -875,7 +903,7 @@ def looker_source_config(sink_file_name): @freeze_time(FROZEN_TIME) def test_independent_look_ingestion_config(pytestconfig, tmp_path, mock_time): """ - if extract_independent_looks is enabled then stateful_ingestion.enabled should also be enabled + if extract_independent_looks is enabled, then stateful_ingestion.enabled should also be enabled """ new_recipe = get_default_recipe(output_file_path=f"{tmp_path}/output") new_recipe["source"]["config"]["extract_independent_looks"] = True @@ -888,13 +916,18 @@ def test_independent_look_ingestion_config(pytestconfig, tmp_path, mock_time): Pipeline.create(new_recipe) -@freeze_time(FROZEN_TIME) -def test_independent_looks_ingest( - pytestconfig, tmp_path, mock_time, mock_datahub_graph -): +def ingest_independent_looks( + pytestconfig: Config, + tmp_path: Path, + mock_time: float, + mock_datahub_graph: mock.MagicMock, + skip_personal_folders: bool, + golden_file_name: str, +) -> None: mocked_client = mock.MagicMock() new_recipe = get_default_recipe(output_file_path=f"{tmp_path}/looker_mces.json") new_recipe["source"]["config"]["extract_independent_looks"] = True + new_recipe["source"]["config"]["skip_personal_folders"] = skip_personal_folders new_recipe["source"]["config"]["stateful_ingestion"] = { "enabled": True, "state_provider": { @@ -920,15 +953,42 @@ def test_independent_looks_ingest( pipeline = Pipeline.create(new_recipe) pipeline.run() pipeline.raise_from_status() - mce_out_file = "golden_test_independent_look_ingest.json" mce_helpers.check_golden_file( pytestconfig, output_path=tmp_path / "looker_mces.json", - golden_path=f"{test_resources_dir}/{mce_out_file}", + golden_path=f"{test_resources_dir}/{golden_file_name}", ) +@freeze_time(FROZEN_TIME) +def test_independent_looks_ingest_with_personal_folder( + pytestconfig, tmp_path, mock_time, mock_datahub_graph +): + ingest_independent_looks( + pytestconfig=pytestconfig, + tmp_path=tmp_path, + mock_time=mock_time, + mock_datahub_graph=mock_datahub_graph, + skip_personal_folders=False, + golden_file_name="golden_test_independent_look_ingest.json", + ) + + +@freeze_time(FROZEN_TIME) +def test_independent_looks_ingest_without_personal_folder( + pytestconfig, tmp_path, mock_time, mock_datahub_graph +): + ingest_independent_looks( + pytestconfig=pytestconfig, + tmp_path=tmp_path, + mock_time=mock_time, + mock_datahub_graph=mock_datahub_graph, + skip_personal_folders=True, + golden_file_name="golden_test_non_personal_independent_look.json", + ) + + @freeze_time(FROZEN_TIME) def test_file_path_in_view_naming_pattern( pytestconfig, tmp_path, mock_time, mock_datahub_graph @@ -1003,9 +1063,10 @@ def test_independent_soft_deleted_looks( soft_deleted=True, ) - assert len(looks) == 2 + assert len(looks) == 3 assert looks[0].title == "Outer Look" - assert looks[1].title == "Soft Deleted" + assert looks[1].title == "Personal Look" + assert looks[2].title == "Soft Deleted" @freeze_time(FROZEN_TIME) diff --git a/metadata-ingestion/tests/integration/mode/mode_mces_golden.json b/metadata-ingestion/tests/integration/mode/mode_mces_golden.json index a6a685672bda0..ed00dc5734680 100644 --- a/metadata-ingestion/tests/integration/mode/mode_mces_golden.json +++ b/metadata-ingestion/tests/integration/mode/mode_mces_golden.json @@ -806,9 +806,9 @@ "json": { "fields": [ { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),amount)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),payment_date)", "schemaField": { - "fieldPath": "amount", + "fieldPath": "payment_date", "nullable": false, "type": { "type": { @@ -828,9 +828,9 @@ } }, { - "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),payment_date)", + "schemaFieldUrn": "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD),amount)", "schemaField": { - "fieldPath": "payment_date", + "fieldPath": "amount", "nullable": false, "type": { "type": { @@ -1075,5 +1075,27 @@ "runId": "mode-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mode,10149707,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1639177973273 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1638860400000, + "runId": "mode-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mode/test_mode.py b/metadata-ingestion/tests/integration/mode/test_mode.py index 7ea6597460de2..ce7533d5611e4 100644 --- a/metadata-ingestion/tests/integration/mode/test_mode.py +++ b/metadata-ingestion/tests/integration/mode/test_mode.py @@ -45,8 +45,12 @@ def __init__(self, error_list, status_code): def json(self): return self.json_data - def get(self, url): + def mount(self, prefix, adaptor): + return self + + def get(self, url, timeout=40): self.url = url + self.timeout = timeout response_json_path = f"{test_resources_dir}/setup/{JSON_RESPONSE_MAP.get(url)}" with open(response_json_path) as file: data = json.loads(file.read()) @@ -74,7 +78,7 @@ def mocked_requests_failure(*args, **kwargs): @freeze_time(FROZEN_TIME) def test_mode_ingest_success(pytestconfig, tmp_path): with patch( - "datahub.ingestion.source.mode.requests.session", + "datahub.ingestion.source.mode.requests.Session", side_effect=mocked_requests_sucess, ): pipeline = Pipeline.create( @@ -111,7 +115,7 @@ def test_mode_ingest_success(pytestconfig, tmp_path): @freeze_time(FROZEN_TIME) def test_mode_ingest_failure(pytestconfig, tmp_path): with patch( - "datahub.ingestion.source.mode.requests.session", + "datahub.ingestion.source.mode.requests.Session", side_effect=mocked_requests_failure, ): global test_resources_dir diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json index 77db69d197c78..7c0ecd8a07ddf 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_file_without_extension.json @@ -146,7 +146,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445129021 + "lastUpdatedTimestamp": 1586808250000 } }, "systemMetadata": { @@ -1370,5 +1370,27 @@ "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808250000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json index b35fb24d43bf3..fb1ab3a869648 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition.json @@ -48,7 +48,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -60,7 +60,7 @@ "isPartOfKey": false }, { - "fieldPath": "height", + "fieldPath": "weight", "nullable": false, "type": { "type": { @@ -72,26 +72,26 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "height", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "color", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false } @@ -117,7 +117,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445089021 + "lastUpdatedTimestamp": 1586808220000 } }, "systemMetadata": { @@ -1307,7 +1307,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -1319,14 +1319,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -1343,7 +1343,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -1355,14 +1355,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -1388,7 +1388,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445119021 + "lastUpdatedTimestamp": 1586808240000 } }, "systemMetadata": { @@ -1798,5 +1798,49 @@ "runId": "folder_no_partition.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808220000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808240000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json index 62ba688990e2b..5c330a1953549 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_exclude.json @@ -48,7 +48,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -60,14 +60,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -84,7 +84,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -96,14 +96,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445119021 + "lastUpdatedTimestamp": 1586808240000 } }, "systemMetadata": { @@ -1458,5 +1458,27 @@ "runId": "folder_no_partition_exclude.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808240000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json index 346c2e9bcd83a..a14cfdfb6f635 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_filename.json @@ -48,7 +48,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -60,7 +60,7 @@ "isPartOfKey": false }, { - "fieldPath": "height", + "fieldPath": "weight", "nullable": false, "type": { "type": { @@ -72,26 +72,26 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "height", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "integer", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "color", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "integer", + "nativeDataType": "string", "recursive": false, "isPartOfKey": false } @@ -117,7 +117,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445089021 + "lastUpdatedTimestamp": 1586808220000 } }, "systemMetadata": { @@ -1307,7 +1307,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -1319,14 +1319,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -1343,7 +1343,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -1355,14 +1355,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -1388,7 +1388,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445119021 + "lastUpdatedTimestamp": 1586808240000 } }, "systemMetadata": { @@ -1798,5 +1798,49 @@ "runId": "folder_no_partition_filename.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808220000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808240000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json index 6bff1bf1b1468..e695804f24f5d 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_no_partition_glob.json @@ -48,7 +48,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -60,14 +60,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -84,7 +84,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -96,14 +96,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445119021 + "lastUpdatedTimestamp": 1586808240000 } }, "systemMetadata": { @@ -1458,5 +1458,27 @@ "runId": "folder_no_partition_glob.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808240000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json index 6668e4e6a26c0..4b78aae2a3642 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_basic.json @@ -48,31 +48,43 @@ }, "fields": [ { - "fieldPath": "effect_changes", + "fieldPath": "effect_changes.effect_entries.effect", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries", + "fieldPath": "effect_changes.effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.effect", + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", "nullable": false, "type": { "type": { @@ -96,19 +108,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.is_native", + "fieldPath": "effect_changes.effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.name", + "fieldPath": "effect_changes.version_group.name", "nullable": false, "type": { "type": { @@ -120,7 +132,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.url", + "fieldPath": "effect_changes.version_group.url", "nullable": false, "type": { "type": { @@ -144,19 +156,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.name", + "fieldPath": "effect_changes", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.url", + "fieldPath": "effect_entries.effect", "nullable": false, "type": { "type": { @@ -168,19 +180,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries", + "fieldPath": "effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.effect", + "fieldPath": "effect_entries.language.url", "nullable": false, "type": { "type": { @@ -204,7 +216,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.name", + "fieldPath": "effect_entries.short_effect", "nullable": false, "type": { "type": { @@ -216,19 +228,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.url", + "fieldPath": "effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.short_effect", + "fieldPath": "flavor_text_entries.flavor_text", "nullable": false, "type": { "type": { @@ -240,19 +252,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries", + "fieldPath": "flavor_text_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.flavor_text", + "fieldPath": "flavor_text_entries.language.url", "nullable": false, "type": { "type": { @@ -276,7 +288,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.name", + "fieldPath": "flavor_text_entries.version_group.name", "nullable": false, "type": { "type": { @@ -288,7 +300,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.url", + "fieldPath": "flavor_text_entries.version_group.url", "nullable": false, "type": { "type": { @@ -312,19 +324,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.name", + "fieldPath": "flavor_text_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.url", + "fieldPath": "generation.name", "nullable": false, "type": { "type": { @@ -336,19 +348,7 @@ "isPartOfKey": false }, { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", + "fieldPath": "generation.url", "nullable": false, "type": { "type": { @@ -360,14 +360,14 @@ "isPartOfKey": false }, { - "fieldPath": "generation.url", + "fieldPath": "generation", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -408,31 +408,19 @@ "isPartOfKey": false }, { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", + "fieldPath": "names.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "names.language.name", + "fieldPath": "names.language.url", "nullable": false, "type": { "type": { @@ -444,14 +432,14 @@ "isPartOfKey": false }, { - "fieldPath": "names.language.url", + "fieldPath": "names.language", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -468,7 +456,7 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon", + "fieldPath": "names", "nullable": false, "type": { "type": { @@ -492,19 +480,19 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon", + "fieldPath": "pokemon.pokemon.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.name", + "fieldPath": "pokemon.pokemon.url", "nullable": false, "type": { "type": { @@ -516,14 +504,14 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.url", + "fieldPath": "pokemon.pokemon", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -538,6 +526,18 @@ "nativeDataType": "int", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false } ] } @@ -561,7 +561,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445259021 + "lastUpdatedTimestamp": 1586808380000 } }, "systemMetadata": { @@ -1711,5 +1711,27 @@ "runId": "folder_partition_basic.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808380000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_basic.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json index 7f8bcfec6d314..201828842b84b 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_keyval.json @@ -48,31 +48,43 @@ }, "fields": [ { - "fieldPath": "effect_changes", + "fieldPath": "effect_changes.effect_entries.effect", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries", + "fieldPath": "effect_changes.effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.effect", + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", "nullable": false, "type": { "type": { @@ -96,19 +108,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.is_native", + "fieldPath": "effect_changes.effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.name", + "fieldPath": "effect_changes.version_group.name", "nullable": false, "type": { "type": { @@ -120,7 +132,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.url", + "fieldPath": "effect_changes.version_group.url", "nullable": false, "type": { "type": { @@ -144,19 +156,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.name", + "fieldPath": "effect_changes", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.url", + "fieldPath": "effect_entries.effect", "nullable": false, "type": { "type": { @@ -168,19 +180,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries", + "fieldPath": "effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.effect", + "fieldPath": "effect_entries.language.url", "nullable": false, "type": { "type": { @@ -204,7 +216,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.name", + "fieldPath": "effect_entries.short_effect", "nullable": false, "type": { "type": { @@ -216,19 +228,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.url", + "fieldPath": "effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.short_effect", + "fieldPath": "flavor_text_entries.flavor_text", "nullable": false, "type": { "type": { @@ -240,19 +252,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries", + "fieldPath": "flavor_text_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.flavor_text", + "fieldPath": "flavor_text_entries.language.url", "nullable": false, "type": { "type": { @@ -276,7 +288,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.name", + "fieldPath": "flavor_text_entries.version_group.name", "nullable": false, "type": { "type": { @@ -288,7 +300,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.url", + "fieldPath": "flavor_text_entries.version_group.url", "nullable": false, "type": { "type": { @@ -312,19 +324,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.name", + "fieldPath": "flavor_text_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.url", + "fieldPath": "generation.name", "nullable": false, "type": { "type": { @@ -336,19 +348,7 @@ "isPartOfKey": false }, { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", + "fieldPath": "generation.url", "nullable": false, "type": { "type": { @@ -360,14 +360,14 @@ "isPartOfKey": false }, { - "fieldPath": "generation.url", + "fieldPath": "generation", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -408,31 +408,19 @@ "isPartOfKey": false }, { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", + "fieldPath": "names.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "names.language.name", + "fieldPath": "names.language.url", "nullable": false, "type": { "type": { @@ -444,14 +432,14 @@ "isPartOfKey": false }, { - "fieldPath": "names.language.url", + "fieldPath": "names.language", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -468,7 +456,7 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon", + "fieldPath": "names", "nullable": false, "type": { "type": { @@ -492,19 +480,19 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon", + "fieldPath": "pokemon.pokemon.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.name", + "fieldPath": "pokemon.pokemon.url", "nullable": false, "type": { "type": { @@ -516,14 +504,14 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.url", + "fieldPath": "pokemon.pokemon", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -538,6 +526,18 @@ "nativeDataType": "int", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false } ] } @@ -561,7 +561,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445259021 + "lastUpdatedTimestamp": 1586808380000 } }, "systemMetadata": { @@ -1711,5 +1711,27 @@ "runId": "folder_partition_keyval.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808380000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_keyval.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema.json index 35efe52994837..52aead01fb2fc 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema.json @@ -48,31 +48,43 @@ }, "fields": [ { - "fieldPath": "effect_changes", + "fieldPath": "effect_changes.effect_entries.effect", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries", + "fieldPath": "effect_changes.effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.effect", + "fieldPath": "effect_changes.effect_entries.language.url", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.StringType": {} + } + }, + "nativeDataType": "str", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "effect_changes.effect_entries.language.is_native", "nullable": false, "type": { "type": { @@ -96,19 +108,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.is_native", + "fieldPath": "effect_changes.effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.name", + "fieldPath": "effect_changes.version_group.name", "nullable": false, "type": { "type": { @@ -120,7 +132,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.effect_entries.language.url", + "fieldPath": "effect_changes.version_group.url", "nullable": false, "type": { "type": { @@ -144,19 +156,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.name", + "fieldPath": "effect_changes", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_changes.version_group.url", + "fieldPath": "effect_entries.effect", "nullable": false, "type": { "type": { @@ -168,19 +180,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries", + "fieldPath": "effect_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.effect", + "fieldPath": "effect_entries.language.url", "nullable": false, "type": { "type": { @@ -204,7 +216,7 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.name", + "fieldPath": "effect_entries.short_effect", "nullable": false, "type": { "type": { @@ -216,19 +228,19 @@ "isPartOfKey": false }, { - "fieldPath": "effect_entries.language.url", + "fieldPath": "effect_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "effect_entries.short_effect", + "fieldPath": "flavor_text_entries.flavor_text", "nullable": false, "type": { "type": { @@ -240,19 +252,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries", + "fieldPath": "flavor_text_entries.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.flavor_text", + "fieldPath": "flavor_text_entries.language.url", "nullable": false, "type": { "type": { @@ -276,7 +288,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.name", + "fieldPath": "flavor_text_entries.version_group.name", "nullable": false, "type": { "type": { @@ -288,7 +300,7 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.language.url", + "fieldPath": "flavor_text_entries.version_group.url", "nullable": false, "type": { "type": { @@ -312,19 +324,19 @@ "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.name", + "fieldPath": "flavor_text_entries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "flavor_text_entries.version_group.url", + "fieldPath": "generation.name", "nullable": false, "type": { "type": { @@ -336,19 +348,7 @@ "isPartOfKey": false }, { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", + "fieldPath": "generation.url", "nullable": false, "type": { "type": { @@ -360,14 +360,14 @@ "isPartOfKey": false }, { - "fieldPath": "generation.url", + "fieldPath": "generation", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -408,31 +408,19 @@ "isPartOfKey": false }, { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", + "fieldPath": "names.language.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "names.language.name", + "fieldPath": "names.language.url", "nullable": false, "type": { "type": { @@ -444,14 +432,14 @@ "isPartOfKey": false }, { - "fieldPath": "names.language.url", + "fieldPath": "names.language", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -468,7 +456,7 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon", + "fieldPath": "names", "nullable": false, "type": { "type": { @@ -492,19 +480,19 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon", + "fieldPath": "pokemon.pokemon.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.RecordType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "dict", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.name", + "fieldPath": "pokemon.pokemon.url", "nullable": false, "type": { "type": { @@ -516,14 +504,14 @@ "isPartOfKey": false }, { - "fieldPath": "pokemon.pokemon.url", + "fieldPath": "pokemon.pokemon", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.RecordType": {} } }, - "nativeDataType": "str", + "nativeDataType": "dict", "recursive": false, "isPartOfKey": false }, @@ -538,6 +526,18 @@ "nativeDataType": "int", "recursive": false, "isPartOfKey": false + }, + { + "fieldPath": "pokemon", + "nullable": false, + "type": { + "type": { + "com.linkedin.schema.ArrayType": {} + } + }, + "nativeDataType": "list", + "recursive": false, + "isPartOfKey": false } ] } @@ -561,7 +561,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688445259021 + "lastUpdatedTimestamp": 1586808380000 } }, "systemMetadata": { @@ -1711,5 +1711,27 @@ "runId": "folder_partition_update_schema.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808380000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema_with_partition_autodetect.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema_with_partition_autodetect.json deleted file mode 100644 index adb3686309e6c..0000000000000 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_update_schema_with_partition_autodetect.json +++ /dev/null @@ -1,2572 +0,0 @@ -[ -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv", - "number_of_files": "3", - "size_in_bytes": "3539" - }, - "name": "folder_aaa.food_csv", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_csv", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833420000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests" - }, - "name": "tests" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration" - }, - "name": "integration" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3" - }, - "name": "s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data" - }, - "name": "test_data" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system" - }, - "name": "local_system" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a" - }, - "name": "folder_a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa" - }, - "name": "folder_aa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:98a716614da5246426edd48260406364" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 4, - "columnCount": 4, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "apple", - "frequency": 1 - }, - { - "value": "cookie", - "frequency": 1 - }, - { - "value": "lasagna", - "frequency": 1 - }, - { - "value": "pasta", - "frequency": 1 - } - ], - "sampleValues": [ - "apple", - "cookie", - "lasagna", - "pasta" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "23", - "frequency": 1 - }, - { - "value": "49", - "frequency": 1 - }, - { - "value": "50", - "frequency": 1 - }, - { - "value": "72", - "frequency": 1 - } - ], - "sampleValues": [ - "23", - "49", - "50", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 1, - "uniqueProportion": 0.25, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 4 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 3, - "uniqueProportion": 0.75, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "brown", - "frequency": 1 - }, - { - "value": "red", - "frequency": 2 - }, - { - "value": "yellow", - "frequency": 1 - } - ], - "sampleValues": [ - "brown", - "red", - "red", - "yellow" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet", - "number_of_files": "2", - "size_in_bytes": "8412" - }, - "name": "folder_aaa.food_parquet", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_parquet", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "healthy", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833440000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 70, - "columnCount": 5, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 9, - "uniqueProportion": 0.13043478260869565, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "distinctValueFrequencies": [ - { - "value": "NullValue", - "frequency": 1 - }, - { - "value": "apple", - "frequency": 7 - }, - { - "value": "chicken", - "frequency": 7 - }, - { - "value": "cookie", - "frequency": 6 - }, - { - "value": "hamburger", - "frequency": 7 - }, - { - "value": "lasagna", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 7 - }, - { - "value": "pasta", - "frequency": 7 - }, - { - "value": "spinach", - "frequency": 7 - }, - { - "value": "sushi", - "frequency": 7 - }, - { - "value": "water", - "frequency": 7 - } - ], - "sampleValues": [ - "apple", - "apple", - "apple", - "chicken", - "cookie", - "cookie", - "cookie", - "lasagna", - "lasagna", - "orange", - "orange", - "pasta", - "pasta", - "pasta", - "pasta", - "spinach", - "spinach", - "spinach", - "water", - "water" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 10, - "uniqueProportion": 0.14285714285714285, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "10", - "frequency": 7 - }, - { - "value": "2", - "frequency": 7 - }, - { - "value": "23", - "frequency": 7 - }, - { - "value": "32", - "frequency": 7 - }, - { - "value": "36", - "frequency": 7 - }, - { - "value": "43", - "frequency": 7 - }, - { - "value": "49", - "frequency": 7 - }, - { - "value": "50", - "frequency": 7 - }, - { - "value": "53", - "frequency": 7 - }, - { - "value": "72", - "frequency": 7 - } - ], - "sampleValues": [ - "10", - "10", - "10", - "23", - "23", - "23", - "32", - "32", - "36", - "43", - "43", - "49", - "49", - "50", - "50", - "50", - "72", - "72", - "72", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 4, - "uniqueProportion": 0.05714285714285714, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 24 - }, - { - "value": "5", - "frequency": 15 - }, - { - "value": "6", - "frequency": 23 - }, - { - "value": "7", - "frequency": 8 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4", - "4", - "4", - "4", - "5", - "5", - "5", - "5", - "5", - "6", - "6", - "6", - "6", - "6", - "6", - "7", - "7" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 7, - "uniqueProportion": 0.1, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "blue", - "frequency": 7 - }, - { - "value": "brown", - "frequency": 14 - }, - { - "value": "green", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 14 - }, - { - "value": "red", - "frequency": 14 - }, - { - "value": "white", - "frequency": 7 - }, - { - "value": "yellow", - "frequency": 7 - } - ], - "sampleValues": [ - "blue", - "blue", - "brown", - "brown", - "brown", - "green", - "green", - "green", - "orange", - "orange", - "red", - "red", - "red", - "red", - "red", - "white", - "yellow", - "yellow", - "yellow", - "yellow" - ] - }, - { - "fieldPath": "healthy", - "uniqueCount": 2, - "uniqueProportion": 0.028985507246376812, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "sampleValues": [ - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "None", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", - "number_of_files": "13", - "size_in_bytes": "188600" - }, - "name": "folder_aaa.pokemon_abilities_json", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.pokemon_abilities_json", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "effect_changes", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.is_native", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.short_effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.flavor_text", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "id", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "is_main_series", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.is_hidden", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.slot", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833590000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 1, - "columnCount": 9, - "fieldProfiles": [ - { - "fieldPath": "effect_changes", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect_entries=[Row(effect='Hat im Kampf keinen Effekt.', language=Row(is_native='no', name='de', url='https://pokeapi.co/api/v2/language/6/')), Row(effect='Has no effect in battle.', language=Row(is_native='yes', name='en', url='https://pokeapi.co/api/v2/language/9/'))], version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/'))]" - ] - }, - { - "fieldPath": "effect_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect='Attacken die Schaden verursachen haben mit jedem Treffer eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen, wenn die Attacke dies nicht bereits als Nebeneffekt hat.\\n\\nDer Effekt stapelt nicht mit dem von getragenen Items.\\n\\nAu\u00dferhalb vom Kampf: Wenn ein Pok\u00e9mon mit dieser F\u00e4higkeit an erster Stelle im Team steht, tauchen wilde Pok\u00e9mon nur halb so oft auf.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), short_effect='Mit jedem Treffer besteht eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen.'), Row(effect=\"This Pok\u00e9mon's damaging moves have a 10% chance to make the target flinch with each hit if they do not already cause flinching as a secondary effect.\\n\\nThis ability does not stack with a held item.\\n\\nOverworld: The wild encounter rate is halved while this Pok\u00e9mon is first in the party.\", language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), short_effect='Has a 10% chance of making target Pok\u00e9mon flinch with each hit.')]" - ] - }, - { - "fieldPath": "flavor_text_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ruby-sapphire', url='https://pokeapi.co/api/v2/version-group/5/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='emerald', url='https://pokeapi.co/api/v2/version-group/6/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='firered-leafgreen', url='https://pokeapi.co/api/v2/version-group/7/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='diamond-pearl', url='https://pokeapi.co/api/v2/version-group/8/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='platinum', url='https://pokeapi.co/api/v2/version-group/9/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='heartgold-soulsilver', url='https://pokeapi.co/api/v2/version-group/10/')), Row(flavor_text='La puanteur peut\\neffrayer l\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-2-white-2', url='https://pokeapi.co/api/v2/version-group/14/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/'))]" - ] - }, - { - "fieldPath": "generation", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "Row(name='generation-iii', url='https://pokeapi.co/api/v2/generation/3/')" - ] - }, - { - "fieldPath": "id", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "1", - "frequency": 1 - } - ], - "sampleValues": [ - "1" - ] - }, - { - "fieldPath": "is_main_series", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "True" - ] - }, - { - "fieldPath": "name", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "stench", - "frequency": 1 - } - ], - "sampleValues": [ - "stench" - ] - }, - { - "fieldPath": "names", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), name='\uc545\ucde8'), Row(language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), name='\u60e1\u81ed'), Row(language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), name='Puanteur'), Row(language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), name='Duftnote'), Row(language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), name='Hedor'), Row(language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), name='Tanfo'), Row(language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), name='Stench'), Row(language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), name='\u6076\u81ed')]" - ] - }, - { - "fieldPath": "pokemon", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(is_hidden=True, pokemon=Row(name='gloom', url='https://pokeapi.co/api/v2/pokemon/44/'), slot=3), Row(is_hidden=False, pokemon=Row(name='grimer', url='https://pokeapi.co/api/v2/pokemon/88/'), slot=1), Row(is_hidden=False, pokemon=Row(name='muk', url='https://pokeapi.co/api/v2/pokemon/89/'), slot=1), Row(is_hidden=False, pokemon=Row(name='stunky', url='https://pokeapi.co/api/v2/pokemon/434/'), slot=1), Row(is_hidden=False, pokemon=Row(name='skuntank', url='https://pokeapi.co/api/v2/pokemon/435/'), slot=1), Row(is_hidden=False, pokemon=Row(name='trubbish', url='https://pokeapi.co/api/v2/pokemon/568/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor', url='https://pokeapi.co/api/v2/pokemon/569/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor-gmax', url='https://pokeapi.co/api/v2/pokemon/10198/'), slot=1)]" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_update_schema_with_partition_autodetect.json", - "lastRunId": "no-run-id-provided" - } -} -] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json deleted file mode 100644 index 80f584788fdb2..0000000000000 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json +++ /dev/null @@ -1,2572 +0,0 @@ -[ -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv", - "number_of_files": "3", - "size_in_bytes": "3539" - }, - "name": "folder_aaa.food_csv", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_csv", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833420000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests" - }, - "name": "tests" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration" - }, - "name": "integration" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3" - }, - "name": "s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data" - }, - "name": "test_data" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system" - }, - "name": "local_system" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a" - }, - "name": "folder_a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa" - }, - "name": "folder_aa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:98a716614da5246426edd48260406364" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 4, - "columnCount": 4, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "apple", - "frequency": 1 - }, - { - "value": "cookie", - "frequency": 1 - }, - { - "value": "lasagna", - "frequency": 1 - }, - { - "value": "pasta", - "frequency": 1 - } - ], - "sampleValues": [ - "apple", - "cookie", - "lasagna", - "pasta" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "23", - "frequency": 1 - }, - { - "value": "49", - "frequency": 1 - }, - { - "value": "50", - "frequency": 1 - }, - { - "value": "72", - "frequency": 1 - } - ], - "sampleValues": [ - "23", - "49", - "50", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 1, - "uniqueProportion": 0.25, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 4 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 3, - "uniqueProportion": 0.75, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "brown", - "frequency": 1 - }, - { - "value": "red", - "frequency": 2 - }, - { - "value": "yellow", - "frequency": 1 - } - ], - "sampleValues": [ - "brown", - "red", - "red", - "yellow" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet", - "number_of_files": "2", - "size_in_bytes": "8412" - }, - "name": "folder_aaa.food_parquet", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_parquet", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "healthy", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833440000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 70, - "columnCount": 5, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 9, - "uniqueProportion": 0.13043478260869565, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "distinctValueFrequencies": [ - { - "value": "NullValue", - "frequency": 1 - }, - { - "value": "apple", - "frequency": 7 - }, - { - "value": "chicken", - "frequency": 7 - }, - { - "value": "cookie", - "frequency": 6 - }, - { - "value": "hamburger", - "frequency": 7 - }, - { - "value": "lasagna", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 7 - }, - { - "value": "pasta", - "frequency": 7 - }, - { - "value": "spinach", - "frequency": 7 - }, - { - "value": "sushi", - "frequency": 7 - }, - { - "value": "water", - "frequency": 7 - } - ], - "sampleValues": [ - "apple", - "apple", - "apple", - "chicken", - "cookie", - "cookie", - "cookie", - "lasagna", - "lasagna", - "orange", - "orange", - "pasta", - "pasta", - "pasta", - "pasta", - "spinach", - "spinach", - "spinach", - "water", - "water" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 10, - "uniqueProportion": 0.14285714285714285, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "10", - "frequency": 7 - }, - { - "value": "2", - "frequency": 7 - }, - { - "value": "23", - "frequency": 7 - }, - { - "value": "32", - "frequency": 7 - }, - { - "value": "36", - "frequency": 7 - }, - { - "value": "43", - "frequency": 7 - }, - { - "value": "49", - "frequency": 7 - }, - { - "value": "50", - "frequency": 7 - }, - { - "value": "53", - "frequency": 7 - }, - { - "value": "72", - "frequency": 7 - } - ], - "sampleValues": [ - "10", - "10", - "10", - "23", - "23", - "23", - "32", - "32", - "36", - "43", - "43", - "49", - "49", - "50", - "50", - "50", - "72", - "72", - "72", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 4, - "uniqueProportion": 0.05714285714285714, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 24 - }, - { - "value": "5", - "frequency": 15 - }, - { - "value": "6", - "frequency": 23 - }, - { - "value": "7", - "frequency": 8 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4", - "4", - "4", - "4", - "5", - "5", - "5", - "5", - "5", - "6", - "6", - "6", - "6", - "6", - "6", - "7", - "7" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 7, - "uniqueProportion": 0.1, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "blue", - "frequency": 7 - }, - { - "value": "brown", - "frequency": 14 - }, - { - "value": "green", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 14 - }, - { - "value": "red", - "frequency": 14 - }, - { - "value": "white", - "frequency": 7 - }, - { - "value": "yellow", - "frequency": 7 - } - ], - "sampleValues": [ - "blue", - "blue", - "brown", - "brown", - "brown", - "green", - "green", - "green", - "orange", - "orange", - "red", - "red", - "red", - "red", - "red", - "white", - "yellow", - "yellow", - "yellow", - "yellow" - ] - }, - { - "fieldPath": "healthy", - "uniqueCount": 2, - "uniqueProportion": 0.028985507246376812, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "sampleValues": [ - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "None", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", - "number_of_files": "13", - "size_in_bytes": "188600" - }, - "name": "folder_aaa.pokemon_abilities_json", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.pokemon_abilities_json", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "effect_changes", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.is_native", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.short_effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.flavor_text", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "id", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "is_main_series", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.is_hidden", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.slot", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833590000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 1, - "columnCount": 9, - "fieldProfiles": [ - { - "fieldPath": "effect_changes", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect_entries=[Row(effect='Hat im Kampf keinen Effekt.', language=Row(is_native='no', name='de', url='https://pokeapi.co/api/v2/language/6/')), Row(effect='Has no effect in battle.', language=Row(is_native='yes', name='en', url='https://pokeapi.co/api/v2/language/9/'))], version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/'))]" - ] - }, - { - "fieldPath": "effect_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect='Attacken die Schaden verursachen haben mit jedem Treffer eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen, wenn die Attacke dies nicht bereits als Nebeneffekt hat.\\n\\nDer Effekt stapelt nicht mit dem von getragenen Items.\\n\\nAu\u00dferhalb vom Kampf: Wenn ein Pok\u00e9mon mit dieser F\u00e4higkeit an erster Stelle im Team steht, tauchen wilde Pok\u00e9mon nur halb so oft auf.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), short_effect='Mit jedem Treffer besteht eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen.'), Row(effect=\"This Pok\u00e9mon's damaging moves have a 10% chance to make the target flinch with each hit if they do not already cause flinching as a secondary effect.\\n\\nThis ability does not stack with a held item.\\n\\nOverworld: The wild encounter rate is halved while this Pok\u00e9mon is first in the party.\", language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), short_effect='Has a 10% chance of making target Pok\u00e9mon flinch with each hit.')]" - ] - }, - { - "fieldPath": "flavor_text_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ruby-sapphire', url='https://pokeapi.co/api/v2/version-group/5/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='emerald', url='https://pokeapi.co/api/v2/version-group/6/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='firered-leafgreen', url='https://pokeapi.co/api/v2/version-group/7/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='diamond-pearl', url='https://pokeapi.co/api/v2/version-group/8/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='platinum', url='https://pokeapi.co/api/v2/version-group/9/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='heartgold-soulsilver', url='https://pokeapi.co/api/v2/version-group/10/')), Row(flavor_text='La puanteur peut\\neffrayer l\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-2-white-2', url='https://pokeapi.co/api/v2/version-group/14/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/'))]" - ] - }, - { - "fieldPath": "generation", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "Row(name='generation-iii', url='https://pokeapi.co/api/v2/generation/3/')" - ] - }, - { - "fieldPath": "id", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "1", - "frequency": 1 - } - ], - "sampleValues": [ - "1" - ] - }, - { - "fieldPath": "is_main_series", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "True" - ] - }, - { - "fieldPath": "name", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "stench", - "frequency": 1 - } - ], - "sampleValues": [ - "stench" - ] - }, - { - "fieldPath": "names", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), name='\uc545\ucde8'), Row(language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), name='\u60e1\u81ed'), Row(language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), name='Puanteur'), Row(language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), name='Duftnote'), Row(language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), name='Hedor'), Row(language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), name='Tanfo'), Row(language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), name='Stench'), Row(language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), name='\u6076\u81ed')]" - ] - }, - { - "fieldPath": "pokemon", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(is_hidden=True, pokemon=Row(name='gloom', url='https://pokeapi.co/api/v2/pokemon/44/'), slot=3), Row(is_hidden=False, pokemon=Row(name='grimer', url='https://pokeapi.co/api/v2/pokemon/88/'), slot=1), Row(is_hidden=False, pokemon=Row(name='muk', url='https://pokeapi.co/api/v2/pokemon/89/'), slot=1), Row(is_hidden=False, pokemon=Row(name='stunky', url='https://pokeapi.co/api/v2/pokemon/434/'), slot=1), Row(is_hidden=False, pokemon=Row(name='skuntank', url='https://pokeapi.co/api/v2/pokemon/435/'), slot=1), Row(is_hidden=False, pokemon=Row(name='trubbish', url='https://pokeapi.co/api/v2/pokemon/568/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor', url='https://pokeapi.co/api/v2/pokemon/569/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor-gmax', url='https://pokeapi.co/api/v2/pokemon/10198/'), slot=1)]" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_all.json", - "lastRunId": "no-run-id-provided" - } -} -] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json deleted file mode 100644 index a384a8f1e501d..0000000000000 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json +++ /dev/null @@ -1,2572 +0,0 @@ -[ -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv/part3.csv", - "number_of_files": "3", - "size_in_bytes": "3539" - }, - "name": "folder_aaa.food_csv", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_csv", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "integer", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833420000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests" - }, - "name": "tests" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration" - }, - "name": "integration" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3" - }, - "name": "s3" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data" - }, - "name": "test_data" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system" - }, - "name": "local_system" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a" - }, - "name": "folder_a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa" - }, - "name": "folder_aa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:98a716614da5246426edd48260406364", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "containerProperties", - "aspect": { - "json": { - "customProperties": { - "platform": "file", - "env": "UAT", - "folder_abs_path": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa" - }, - "name": "folder_aaa" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "Folder" - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:98a716614da5246426edd48260406364" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "container", - "entityUrn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 4, - "columnCount": 4, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "apple", - "frequency": 1 - }, - { - "value": "cookie", - "frequency": 1 - }, - { - "value": "lasagna", - "frequency": 1 - }, - { - "value": "pasta", - "frequency": 1 - } - ], - "sampleValues": [ - "apple", - "cookie", - "lasagna", - "pasta" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 4, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "23", - "frequency": 1 - }, - { - "value": "49", - "frequency": 1 - }, - { - "value": "50", - "frequency": 1 - }, - { - "value": "72", - "frequency": 1 - } - ], - "sampleValues": [ - "23", - "49", - "50", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 1, - "uniqueProportion": 0.25, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 4 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 3, - "uniqueProportion": 0.75, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "brown", - "frequency": 1 - }, - { - "value": "red", - "frequency": 2 - }, - { - "value": "yellow", - "frequency": 1 - } - ], - "sampleValues": [ - "brown", - "red", - "red", - "yellow" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet/part2.parquet", - "number_of_files": "2", - "size_in_bytes": "8412" - }, - "name": "folder_aaa.food_parquet", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.food_parquet", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "color", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "healthy", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "height", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "weight", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int64", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833440000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 70, - "columnCount": 5, - "fieldProfiles": [ - { - "fieldPath": "name", - "uniqueCount": 9, - "uniqueProportion": 0.13043478260869565, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "distinctValueFrequencies": [ - { - "value": "NullValue", - "frequency": 1 - }, - { - "value": "apple", - "frequency": 7 - }, - { - "value": "chicken", - "frequency": 7 - }, - { - "value": "cookie", - "frequency": 6 - }, - { - "value": "hamburger", - "frequency": 7 - }, - { - "value": "lasagna", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 7 - }, - { - "value": "pasta", - "frequency": 7 - }, - { - "value": "spinach", - "frequency": 7 - }, - { - "value": "sushi", - "frequency": 7 - }, - { - "value": "water", - "frequency": 7 - } - ], - "sampleValues": [ - "apple", - "apple", - "apple", - "chicken", - "cookie", - "cookie", - "cookie", - "lasagna", - "lasagna", - "orange", - "orange", - "pasta", - "pasta", - "pasta", - "pasta", - "spinach", - "spinach", - "spinach", - "water", - "water" - ] - }, - { - "fieldPath": "weight", - "uniqueCount": 10, - "uniqueProportion": 0.14285714285714285, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "10", - "frequency": 7 - }, - { - "value": "2", - "frequency": 7 - }, - { - "value": "23", - "frequency": 7 - }, - { - "value": "32", - "frequency": 7 - }, - { - "value": "36", - "frequency": 7 - }, - { - "value": "43", - "frequency": 7 - }, - { - "value": "49", - "frequency": 7 - }, - { - "value": "50", - "frequency": 7 - }, - { - "value": "53", - "frequency": 7 - }, - { - "value": "72", - "frequency": 7 - } - ], - "sampleValues": [ - "10", - "10", - "10", - "23", - "23", - "23", - "32", - "32", - "36", - "43", - "43", - "49", - "49", - "50", - "50", - "50", - "72", - "72", - "72", - "72" - ] - }, - { - "fieldPath": "height", - "uniqueCount": 4, - "uniqueProportion": 0.05714285714285714, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "4", - "frequency": 24 - }, - { - "value": "5", - "frequency": 15 - }, - { - "value": "6", - "frequency": 23 - }, - { - "value": "7", - "frequency": 8 - } - ], - "sampleValues": [ - "4", - "4", - "4", - "4", - "4", - "4", - "4", - "5", - "5", - "5", - "5", - "5", - "6", - "6", - "6", - "6", - "6", - "6", - "7", - "7" - ] - }, - { - "fieldPath": "color", - "uniqueCount": 7, - "uniqueProportion": 0.1, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "blue", - "frequency": 7 - }, - { - "value": "brown", - "frequency": 14 - }, - { - "value": "green", - "frequency": 7 - }, - { - "value": "orange", - "frequency": 14 - }, - { - "value": "red", - "frequency": 14 - }, - { - "value": "white", - "frequency": 7 - }, - { - "value": "yellow", - "frequency": 7 - } - ], - "sampleValues": [ - "blue", - "blue", - "brown", - "brown", - "brown", - "green", - "green", - "green", - "orange", - "orange", - "red", - "red", - "red", - "red", - "red", - "white", - "yellow", - "yellow", - "yellow", - "yellow" - ] - }, - { - "fieldPath": "healthy", - "uniqueCount": 2, - "uniqueProportion": 0.028985507246376812, - "nullCount": 1, - "nullProportion": 0.014285714285714285, - "sampleValues": [ - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "False", - "None", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True", - "True" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProperties", - "aspect": { - "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json/year=2022/month=jan/part3.json", - "number_of_files": "13", - "size_in_bytes": "188600" - }, - "name": "folder_aaa.pokemon_abilities_json", - "description": "", - "tags": [] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "schemaMetadata", - "aspect": { - "json": { - "schemaName": "folder_aaa.pokemon_abilities_json", - "platform": "urn:li:dataPlatform:file", - "version": 0, - "created": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "lastModified": { - "time": 0, - "actor": "urn:li:corpuser:unknown" - }, - "hash": "", - "platformSchema": { - "com.linkedin.schema.OtherSchema": { - "rawSchema": "" - } - }, - "fields": [ - { - "fieldPath": "effect_changes", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.is_native", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_changes.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "effect_entries.short_effect", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.flavor_text", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "flavor_text_entries.version_group.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "generation.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "id", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "is_main_series", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.language.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "names.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.ArrayType": {} - } - }, - "nativeDataType": "list", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.is_hidden", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.BooleanType": {} - } - }, - "nativeDataType": "bool", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.RecordType": {} - } - }, - "nativeDataType": "dict", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.name", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.pokemon.url", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.StringType": {} - } - }, - "nativeDataType": "str", - "recursive": false, - "isPartOfKey": false - }, - { - "fieldPath": "pokemon.slot", - "nullable": false, - "type": { - "type": { - "com.linkedin.schema.NumberType": {} - } - }, - "nativeDataType": "int", - "recursive": false, - "isPartOfKey": false - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "operation", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "operationType": "UPDATE", - "lastUpdatedTimestamp": 1586833590000 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "datasetProfile", - "aspect": { - "json": { - "timestampMillis": 1615443388097, - "partitionSpec": { - "partition": "FULL_TABLE_SNAPSHOT", - "type": "FULL_TABLE" - }, - "rowCount": 1, - "columnCount": 9, - "fieldProfiles": [ - { - "fieldPath": "effect_changes", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect_entries=[Row(effect='Hat im Kampf keinen Effekt.', language=Row(is_native='no', name='de', url='https://pokeapi.co/api/v2/language/6/')), Row(effect='Has no effect in battle.', language=Row(is_native='yes', name='en', url='https://pokeapi.co/api/v2/language/9/'))], version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/'))]" - ] - }, - { - "fieldPath": "effect_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(effect='Attacken die Schaden verursachen haben mit jedem Treffer eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen, wenn die Attacke dies nicht bereits als Nebeneffekt hat.\\n\\nDer Effekt stapelt nicht mit dem von getragenen Items.\\n\\nAu\u00dferhalb vom Kampf: Wenn ein Pok\u00e9mon mit dieser F\u00e4higkeit an erster Stelle im Team steht, tauchen wilde Pok\u00e9mon nur halb so oft auf.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), short_effect='Mit jedem Treffer besteht eine 10% Chance das Ziel zur\u00fcckschrecken zu lassen.'), Row(effect=\"This Pok\u00e9mon's damaging moves have a 10% chance to make the target flinch with each hit if they do not already cause flinching as a secondary effect.\\n\\nThis ability does not stack with a held item.\\n\\nOverworld: The wild encounter rate is halved while this Pok\u00e9mon is first in the party.\", language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), short_effect='Has a 10% chance of making target Pok\u00e9mon flinch with each hit.')]" - ] - }, - { - "fieldPath": "flavor_text_entries", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ruby-sapphire', url='https://pokeapi.co/api/v2/version-group/5/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='emerald', url='https://pokeapi.co/api/v2/version-group/6/')), Row(flavor_text='Helps repel wild POK\u00e9MON.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='firered-leafgreen', url='https://pokeapi.co/api/v2/version-group/7/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='diamond-pearl', url='https://pokeapi.co/api/v2/version-group/8/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='platinum', url='https://pokeapi.co/api/v2/version-group/9/')), Row(flavor_text='The stench helps keep\\nwild Pok\u00e9mon away.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='heartgold-soulsilver', url='https://pokeapi.co/api/v2/version-group/10/')), Row(flavor_text='La puanteur peut\\neffrayer l\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-white', url='https://pokeapi.co/api/v2/version-group/11/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='black-2-white-2', url='https://pokeapi.co/api/v2/version-group/14/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='x-y', url='https://pokeapi.co/api/v2/version-group/15/')), Row(flavor_text='\u304f\u3055\u304f\u3066\\u3000\u3042\u3044\u3066\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\uc545\ucde8 \ub54c\ubb38\uc5d0 \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='La puanteur peut effrayer\\nl\u2019adversaire.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='L\u00e4sst den Gegner durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='Es posible que el rival retroceda\\npor el mal olor.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='A volte il cattivo odore\\nfa tentennare i nemici.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='The stench may cause\\nthe target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u81ed\u304f\u3066\\u3000\u76f8\u624b\u304c\\n\u3072\u308b\u3080\\u3000\u3053\u3068\u304c\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='omega-ruby-alpha-sapphire', url='https://pokeapi.co/api/v2/version-group/16/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='sun-moon', url='https://pokeapi.co/api/v2/version-group/17/')), Row(flavor_text='\u304f\u3055\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u306f\u306a\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u3053\u3046\u3052\u304d\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u3042\u3044\u3066\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\uc545\ucde8\ub97c \ud48d\uaca8\uc11c\\n\uacf5\uaca9\ud588\uc744 \ub54c \uc0c1\ub300\uac00\\n\ud480\uc8fd\uc744 \ub54c\uac00 \uc788\ub2e4.', language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u767c\u51fa\u81ed\u6c23\uff0c\\n\u5728\u653b\u64ca\u7684\u6642\u5019\uff0c\\n\u6709\u6642\u6703\u4f7f\u5c0d\u624b\u754f\u7e2e\u3002', language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Le Pok\u00e9mon \u00e9met une odeur si naus\u00e9abonde\\nqu\u2019il peut effrayer sa cible.', language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='L\u00e4sst das Ziel beim Angriff eventuell durch Gestank\\nzur\u00fcckschrecken.', language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='Debido al mal olor que emana, al atacar al rival puede\\nhacerlo retroceder.', language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='A volte il cattivo odore emesso dal Pok\u00e9mon\\nfa tentennare i nemici quando attacca.', language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='By releasing stench when attacking, this Pok\u00e9mon\\nmay cause the target to flinch.', language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u81ed\u3044\\u3000\u306b\u304a\u3044\u3092\\u3000\u653e\u3064\u3053\u3068\u306b\u3088\u3063\u3066\\n\u653b\u6483\u3057\u305f\\u3000\u3068\u304d\u306b\\u3000\u76f8\u624b\u3092\\n\u3072\u308b\u307e\u305b\u308b\u3053\u3068\u304c\\u3000\u3042\u308b\u3002', language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/')), Row(flavor_text='\u901a\u8fc7\u91ca\u653e\u81ed\u81ed\u7684\u6c14\u5473\uff0c\\n\u5728\u653b\u51fb\u7684\u65f6\u5019\uff0c\\n\u6709\u65f6\u4f1a\u4f7f\u5bf9\u624b\u754f\u7f29\u3002', language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), version_group=Row(name='ultra-sun-ultra-moon', url='https://pokeapi.co/api/v2/version-group/18/'))]" - ] - }, - { - "fieldPath": "generation", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "Row(name='generation-iii', url='https://pokeapi.co/api/v2/generation/3/')" - ] - }, - { - "fieldPath": "id", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "1", - "frequency": 1 - } - ], - "sampleValues": [ - "1" - ] - }, - { - "fieldPath": "is_main_series", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "True" - ] - }, - { - "fieldPath": "name", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "distinctValueFrequencies": [ - { - "value": "stench", - "frequency": 1 - } - ], - "sampleValues": [ - "stench" - ] - }, - { - "fieldPath": "names", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(language=Row(name='ja-Hrkt', url='https://pokeapi.co/api/v2/language/1/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='ko', url='https://pokeapi.co/api/v2/language/3/'), name='\uc545\ucde8'), Row(language=Row(name='zh-Hant', url='https://pokeapi.co/api/v2/language/4/'), name='\u60e1\u81ed'), Row(language=Row(name='fr', url='https://pokeapi.co/api/v2/language/5/'), name='Puanteur'), Row(language=Row(name='de', url='https://pokeapi.co/api/v2/language/6/'), name='Duftnote'), Row(language=Row(name='es', url='https://pokeapi.co/api/v2/language/7/'), name='Hedor'), Row(language=Row(name='it', url='https://pokeapi.co/api/v2/language/8/'), name='Tanfo'), Row(language=Row(name='en', url='https://pokeapi.co/api/v2/language/9/'), name='Stench'), Row(language=Row(name='ja', url='https://pokeapi.co/api/v2/language/11/'), name='\u3042\u304f\u3057\u3085\u3046'), Row(language=Row(name='zh-Hans', url='https://pokeapi.co/api/v2/language/12/'), name='\u6076\u81ed')]" - ] - }, - { - "fieldPath": "pokemon", - "uniqueCount": 1, - "uniqueProportion": 1.0, - "nullCount": 0, - "nullProportion": 0.0, - "sampleValues": [ - "[Row(is_hidden=True, pokemon=Row(name='gloom', url='https://pokeapi.co/api/v2/pokemon/44/'), slot=3), Row(is_hidden=False, pokemon=Row(name='grimer', url='https://pokeapi.co/api/v2/pokemon/88/'), slot=1), Row(is_hidden=False, pokemon=Row(name='muk', url='https://pokeapi.co/api/v2/pokemon/89/'), slot=1), Row(is_hidden=False, pokemon=Row(name='stunky', url='https://pokeapi.co/api/v2/pokemon/434/'), slot=1), Row(is_hidden=False, pokemon=Row(name='skuntank', url='https://pokeapi.co/api/v2/pokemon/435/'), slot=1), Row(is_hidden=False, pokemon=Row(name='trubbish', url='https://pokeapi.co/api/v2/pokemon/568/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor', url='https://pokeapi.co/api/v2/pokemon/569/'), slot=1), Row(is_hidden=False, pokemon=Row(name='garbodor-gmax', url='https://pokeapi.co/api/v2/pokemon/10198/'), slot=1)]" - ] - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc", - "urn": "urn:li:container:583fb3ef3a2b226ea2630157568eb7dc" - }, - { - "id": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886", - "urn": "urn:li:container:bc816cf2df9acd90fcefa42dc425d886" - }, - { - "id": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4", - "urn": "urn:li:container:d20e88ff88a6de6e53e437d342e218f4" - }, - { - "id": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50", - "urn": "urn:li:container:f6ff9cd64806a7bb00e2e3bf37acca50" - }, - { - "id": "urn:li:container:93525defb812252106d3b0c08a55e39a", - "urn": "urn:li:container:93525defb812252106d3b0c08a55e39a" - }, - { - "id": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156", - "urn": "urn:li:container:48a8653fc4afb55b12cd8d0280e09156" - }, - { - "id": "urn:li:container:98a716614da5246426edd48260406364", - "urn": "urn:li:container:98a716614da5246426edd48260406364" - }, - { - "id": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec", - "urn": "urn:li:container:a0904d16a673fde8cbc8d0f2e167ecec" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_csv,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/pokemon_abilities_json,UAT)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "json": { - "removed": false - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", - "lastRunId": "no-run-id-provided" - } -} -] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json index 4f98d68f8ae62..6ae2ec160035e 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_files.json @@ -458,7 +458,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444889021 + "lastUpdatedTimestamp": 1586808010000 } }, "systemMetadata": { @@ -2896,38 +2896,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -2965,7 +2965,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444899021 + "lastUpdatedTimestamp": 1586808020000 } }, "systemMetadata": { @@ -3541,7 +3541,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444909021 + "lastUpdatedTimestamp": 1586808030000 } }, "systemMetadata": { @@ -3974,17 +3974,11 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json", - "number_of_files": "1", - "size_in_bytes": "4646" - }, - "name": "countries_json.json", - "description": "", - "tags": [] + "platform": "urn:li:dataPlatform:file", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" } }, "systemMetadata": { @@ -3997,11 +3991,17 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "datasetProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:file", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json", + "number_of_files": "1", + "size_in_bytes": "4646" + }, + "name": "countries_json.json", + "description": "", + "tags": [] } }, "systemMetadata": { @@ -4036,14 +4036,14 @@ }, "fields": [ { - "fieldPath": "countries", + "fieldPath": "countries.name", "nullable": false, "type": { "type": { - "com.linkedin.schema.ArrayType": {} + "com.linkedin.schema.StringType": {} } }, - "nativeDataType": "list", + "nativeDataType": "str", "recursive": false, "isPartOfKey": false }, @@ -4060,14 +4060,14 @@ "isPartOfKey": false }, { - "fieldPath": "countries.name", + "fieldPath": "countries", "nullable": false, "type": { "type": { - "com.linkedin.schema.StringType": {} + "com.linkedin.schema.ArrayType": {} } }, - "nativeDataType": "str", + "nativeDataType": "list", "recursive": false, "isPartOfKey": false } @@ -4093,7 +4093,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444919021 + "lastUpdatedTimestamp": 1586808040000 } }, "systemMetadata": { @@ -4205,6 +4205,23 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", @@ -4254,7 +4271,7 @@ }, "fields": [ { - "fieldPath": "color", + "fieldPath": "name", "nullable": false, "type": { "type": { @@ -4266,14 +4283,14 @@ "isPartOfKey": false }, { - "fieldPath": "healthy", + "fieldPath": "weight", "nullable": false, "type": { "type": { - "com.linkedin.schema.BooleanType": {} + "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "bool", + "nativeDataType": "int64", "recursive": false, "isPartOfKey": false }, @@ -4290,7 +4307,7 @@ "isPartOfKey": false }, { - "fieldPath": "name", + "fieldPath": "color", "nullable": false, "type": { "type": { @@ -4302,14 +4319,14 @@ "isPartOfKey": false }, { - "fieldPath": "weight", + "fieldPath": "healthy", "nullable": false, "type": { "type": { - "com.linkedin.schema.NumberType": {} + "com.linkedin.schema.BooleanType": {} } }, - "nativeDataType": "int64", + "nativeDataType": "bool", "recursive": false, "isPartOfKey": false } @@ -4335,24 +4352,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444929021 - } - }, - "systemMetadata": { - "lastObserved": 1615443388097, - "runId": "multiple_files.json", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", - "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", - "aspect": { - "json": { - "platform": "urn:li:dataPlatform:file", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + "lastUpdatedTimestamp": 1586808050000 } }, "systemMetadata": { @@ -4739,17 +4739,11 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", - "aspectName": "datasetProperties", + "aspectName": "dataPlatformInstance", "aspect": { "json": { - "customProperties": { - "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv", - "number_of_files": "1", - "size_in_bytes": "172" - }, - "name": "small.csv", - "description": "", - "tags": [] + "platform": "urn:li:dataPlatform:file", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" } }, "systemMetadata": { @@ -4762,11 +4756,17 @@ "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "datasetProperties", "aspect": { "json": { - "platform": "urn:li:dataPlatform:file", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + "customProperties": { + "schema_inferred_from": "tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv", + "number_of_files": "1", + "size_in_bytes": "172" + }, + "name": "small.csv", + "description": "", + "tags": [] } }, "systemMetadata": { @@ -4882,7 +4882,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444939021 + "lastUpdatedTimestamp": 1586808060000 } }, "systemMetadata": { @@ -5124,6 +5124,23 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:file", + "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + } + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", @@ -5566,7 +5583,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444949021 + "lastUpdatedTimestamp": 1586808070000 } }, "systemMetadata": { @@ -7807,13 +7824,12 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", "changeType": "UPSERT", - "aspectName": "dataPlatformInstance", + "aspectName": "status", "aspect": { "json": { - "platform": "urn:li:dataPlatform:file", - "instance": "urn:li:dataPlatformInstance:(urn:li:dataPlatform:file,test-platform-instance)" + "removed": false } }, "systemMetadata": { @@ -7824,7 +7840,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7840,7 +7856,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7856,7 +7872,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7872,7 +7888,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7888,7 +7904,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7904,7 +7920,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", "changeType": "UPSERT", "aspectName": "status", "aspect": { @@ -7918,15 +7934,153 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808010000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808020000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808030000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808040000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808050000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808060000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "dataset", "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,test-platform-instance.tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", - "changeType": "UPSERT", - "aspectName": "status", + "changeType": "PATCH", + "aspectName": "datasetProperties", "aspect": { - "json": { - "removed": false - } + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808070000 + } + } + ] }, "systemMetadata": { "lastObserved": 1615443388097, diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json index 491cbdf8b9704..9bb8412b64f91 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_spec_for_files.json @@ -60,38 +60,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444899021 + "lastUpdatedTimestamp": 1586808020000 } }, "systemMetadata": { @@ -1619,7 +1619,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444909021 + "lastUpdatedTimestamp": 1586808030000 } }, "systemMetadata": { @@ -2075,5 +2075,49 @@ "runId": "multiple_spec_for_files.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808020000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808030000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_specs_of_different_buckets.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_specs_of_different_buckets.json index da7c76876d415..fea1929b98ab5 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_specs_of_different_buckets.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_multiple_specs_of_different_buckets.json @@ -60,38 +60,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444899021 + "lastUpdatedTimestamp": 1586808020000 } }, "systemMetadata": { @@ -1619,7 +1619,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444909021 + "lastUpdatedTimestamp": 1586808030000 } }, "systemMetadata": { @@ -2075,5 +2075,49 @@ "runId": "multiple_specs_of_different_buckets.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808020000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808030000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json index 76e562142e39e..a31a721fbbadd 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/local/golden_mces_single_file.json @@ -60,38 +60,38 @@ "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "FourthChord", + "nativeDataType": "SecondChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].SecondChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "SecondChord", + "nativeDataType": "ThirdChord", "recursive": false, "isPartOfKey": false }, { - "fieldPath": "[version=2.0].[type=Record].[type=long].ThirdChord", + "fieldPath": "[version=2.0].[type=Record].[type=long].FourthChord", "nullable": false, "type": { "type": { "com.linkedin.schema.NumberType": {} } }, - "nativeDataType": "ThirdChord", + "nativeDataType": "FourthChord", "recursive": false, "isPartOfKey": false }, @@ -129,7 +129,7 @@ "type": "FULL_TABLE" }, "operationType": "UPDATE", - "lastUpdatedTimestamp": 1688444899021 + "lastUpdatedTimestamp": 1586808020000 } }, "systemMetadata": { @@ -1504,5 +1504,27 @@ "runId": "single_file.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:file,tests/integration/s3/test_data/local_system/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586808020000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json index abc6eb1b471b2..63888d6bc4351 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_inference_without_extension.json @@ -174,7 +174,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -269,7 +269,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -384,7 +384,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -503,7 +503,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -626,7 +626,7 @@ }, "systemMetadata": { "lastObserved": 1615443388097, - "runId": "file_inference_without_extension.json", + "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } }, @@ -806,5 +806,27 @@ "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847850000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "file_inference_without_extension.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json index 1c022fabf9158..8087ea591beef 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_file_without_extension.json @@ -806,5 +806,27 @@ "runId": "file_without_extension.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/no_extension/small,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847850000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "file_without_extension.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json index 41484bec81935..64c1505414ff8 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition.json @@ -817,5 +817,49 @@ "runId": "folder_no_partition.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json index 0b28381fce8ff..f86c652462fd4 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_exclude.json @@ -627,5 +627,27 @@ "runId": "folder_no_partition_exclude.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_exclude.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json index c4cfed8bfc7ac..2575db41ca8b7 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_filename.json @@ -817,5 +817,49 @@ "runId": "folder_no_partition_filename.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_filename.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json index ae81f60ac8dcc..272beb57e85e1 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_no_partition_glob.json @@ -627,5 +627,27 @@ "runId": "folder_no_partition_glob.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_no_partition_glob.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json index 684af901e6832..21623e2216565 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_update_schema_with_partition_autodetect.json @@ -1483,5 +1483,49 @@ "runId": "folder_partition_update_schema_with_partition_autodetect.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_update_schema_with_partition_autodetect.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json index 4d23cadbbc4d3..154bce421e18a 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_all.json @@ -1483,5 +1483,49 @@ "runId": "folder_partition_with_partition_autodetect_traverse_all.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_all.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json index 6017a27a88895..f483f806e6193 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_folder_partition_with_partition_autodetect_traverse_min_max.json @@ -1483,5 +1483,49 @@ "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_csv,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847820000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet,UAT)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847840000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "folder_partition_with_partition_autodetect_traverse_min_max.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json index 90a361219c1bf..38ce5188e0a8e 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_files.json @@ -2625,5 +2625,159 @@ "runId": "multiple_files.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/NPS.7.1.package_data_NPS.6.1_ARCN_Lakes_ChemistryData_v1_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847610000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847620000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847630000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/wa_fn_usec_hr_employee_attrition_csv.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847670000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/small.csv,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847660000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/countries_json.json,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847640000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,test-platform-instance.my-test-bucket/folder_a/folder_aa/folder_aaa/food_parquet.parquet,DEV)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847650000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_files.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json index c67977ef7fa1b..7f657cb69180a 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_spec_for_files.json @@ -911,5 +911,49 @@ "runId": "multiple_spec_for_files.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847620000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847630000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_spec_for_files.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json index d96bdce912082..6e2e966f1f7b4 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_multiple_specs_of_different_buckets.json @@ -1338,5 +1338,49 @@ "runId": "multiple_specs_of_different_buckets.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847620000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket-2/folder_a/folder_aa/folder_aaa/chord_progressions_csv.csv,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847630000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "multiple_specs_of_different_buckets.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json index 7703d137ddd29..be3d2efed088e 100644 --- a/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json +++ b/metadata-ingestion/tests/integration/s3/golden-files/s3/golden_mces_single_file.json @@ -684,5 +684,27 @@ "runId": "single_file.json", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:s3,my-test-bucket/folder_a/folder_aa/folder_aaa/chord_progressions_avro.avro,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1586847620000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1615443388097, + "runId": "single_file.json", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/s3/test_s3.py b/metadata-ingestion/tests/integration/s3/test_s3.py index 54156610c6872..0e73cdca006bd 100644 --- a/metadata-ingestion/tests/integration/s3/test_s3.py +++ b/metadata-ingestion/tests/integration/s3/test_s3.py @@ -242,6 +242,7 @@ def test_data_lake_local_ingest( golden_path=f"{test_resources_dir}/golden-files/local/golden_mces_{source_file}", ignore_paths=[ r"root\[\d+\]\['aspect'\]\['json'\]\['lastUpdatedTimestamp'\]", + r"root\[\d+\]\['aspect'\]\['json'\]\[\d+\]\['value'\]\['time'\]", r"root\[\d+\]\['proposedSnapshot'\].+\['aspects'\].+\['created'\]\['time'\]", # root[41]['aspect']['json']['fieldProfiles'][0]['sampleValues'][0] r"root\[\d+\]\['aspect'\]\['json'\]\['fieldProfiles'\]\[\d+\]\['sampleValues'\]", diff --git a/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json b/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json index 6a3ce983950b0..82c760458ca14 100644 --- a/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json +++ b/metadata-ingestion/tests/integration/salesforce/salesforce_mces_golden.json @@ -1514,8 +1514,8 @@ "json": { "timestampMillis": 1652353200000, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:user@mydomain.com", "operationType": "CREATE", @@ -1537,8 +1537,8 @@ "json": { "timestampMillis": 1652353200000, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "actor": "urn:li:corpuser:user@mydomain.com", "operationType": "ALTER", @@ -2023,8 +2023,8 @@ "json": { "timestampMillis": 1652353200000, "partitionSpec": { - "type": "FULL_TABLE", - "partition": "FULL_TABLE_SNAPSHOT" + "partition": "FULL_TABLE_SNAPSHOT", + "type": "FULL_TABLE" }, "rowCount": 3, "columnCount": 15 @@ -2099,5 +2099,27 @@ "runId": "salesforce-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:salesforce,Property__c,PROD)", + "changeType": "PATCH", + "aspectName": "datasetProperties", + "aspect": { + "json": [ + { + "op": "add", + "path": "/lastModified", + "value": { + "time": 1652784043000 + } + } + ] + }, + "systemMetadata": { + "lastObserved": 1652353200000, + "runId": "salesforce-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_allow_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_allow_mces_golden.json new file mode 100644 index 0000000000000..8798ca291422c --- /dev/null +++ b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_allow_mces_golden.json @@ -0,0 +1,352 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "190a6a5c-63ed-4de1-8045-faeae5df5b01" + }, + "name": "default" + } + }, + "systemMetadata": { + "lastObserved": 1727349368101, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368102, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368103, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368104, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1727349368105, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "79d02655-88e5-45a6-9f9b-eeaf5fe54903" + }, + "name": "DenyProject" + } + }, + "systemMetadata": { + "lastObserved": 1727349368108, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368109, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368109, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368110, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + } + }, + "systemMetadata": { + "lastObserved": 1727349368111, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368112, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "workbook_id": "ee012e36-d916-4c21-94ab-f0d66736af4e" + }, + "externalUrl": "https://do-not-connect/#/site/acryl/workbooks/17904", + "name": "Deny Pattern WorkBook", + "description": "" + } + }, + "systemMetadata": { + "lastObserved": 1727349368113, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368114, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368115, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Workbook" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368116, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1727349368117, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce" + } + }, + "systemMetadata": { + "lastObserved": 1727349368118, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "urn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368118, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_deny_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_deny_mces_golden.json new file mode 100644 index 0000000000000..96dcfeb246c91 --- /dev/null +++ b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_deny_mces_golden.json @@ -0,0 +1,184 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "c30aafe5-44f4-4f28-80d3-d181010a263c" + }, + "name": "Project 2" + } + }, + "systemMetadata": { + "lastObserved": 1727349368232, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368233, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368233, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368234, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1727349368235, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "910733aa-2e95-4ac3-a2e8-71570751099d" + }, + "name": "Samples" + } + }, + "systemMetadata": { + "lastObserved": 1727349368238, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368239, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368239, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368240, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1727349368241, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 4be39f02757ba..5a5552a78c56f 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -545,7 +545,72 @@ def test_value_error_projects_and_project_pattern( pipeline_config=new_config, ) except Exception as e: - assert "projects is deprecated. Please use project_pattern only" in str(e) + assert "projects is deprecated. Please use project_path_pattern only" in str(e) + + +def test_project_pattern_deprecation(pytestconfig, tmp_path, mock_datahub_graph): + # Ingestion should raise ValueError + output_file_name: str = "tableau_project_pattern_deprecation_mces.json" + golden_file_name: str = "tableau_project_pattern_deprecation_mces_golden.json" + + new_config = config_source_default.copy() + del new_config["projects"] + new_config["project_pattern"] = {"allow": ["^Samples$"]} + new_config["project_path_pattern"] = {"allow": ["^Samples$"]} + + try: + tableau_ingest_common( + pytestconfig, + tmp_path, + mock_data(), + golden_file_name, + output_file_name, + mock_datahub_graph, + pipeline_config=new_config, + ) + except Exception as e: + assert ( + "project_pattern is deprecated. Please use project_path_pattern only" + in str(e) + ) + + +def test_project_path_pattern_allow(pytestconfig, tmp_path, mock_datahub_graph): + output_file_name: str = "tableau_project_path_pattern_allow_mces.json" + golden_file_name: str = "tableau_project_path_pattern_allow_mces_golden.json" + + new_config = config_source_default.copy() + del new_config["projects"] + new_config["project_path_pattern"] = {"allow": ["default/DenyProject"]} + + tableau_ingest_common( + pytestconfig, + tmp_path, + mock_data(), + golden_file_name, + output_file_name, + mock_datahub_graph, + pipeline_config=new_config, + ) + + +def test_project_path_pattern_deny(pytestconfig, tmp_path, mock_datahub_graph): + output_file_name: str = "tableau_project_path_pattern_deny_mces.json" + golden_file_name: str = "tableau_project_path_pattern_deny_mces_golden.json" + + new_config = config_source_default.copy() + del new_config["projects"] + new_config["project_path_pattern"] = {"deny": ["^default.*"]} + + tableau_ingest_common( + pytestconfig, + tmp_path, + mock_data(), + golden_file_name, + output_file_name, + mock_datahub_graph, + pipeline_config=new_config, + ) @freeze_time(FROZEN_TIME) diff --git a/metadata-ingestion/tests/performance/databricks/test_unity.py b/metadata-ingestion/tests/performance/databricks/test_unity.py index 6592ffe5198c1..ddd19804ba184 100644 --- a/metadata-ingestion/tests/performance/databricks/test_unity.py +++ b/metadata-ingestion/tests/performance/databricks/test_unity.py @@ -40,7 +40,10 @@ def run_test(): print("Data generated") config = UnityCatalogSourceConfig( - token="", workspace_url="http://localhost:1234", include_usage_statistics=False + token="", + workspace_url="http://localhost:1234", + include_usage_statistics=True, + include_hive_metastore=False, ) ctx = PipelineContext(run_id="test") with patch( @@ -61,7 +64,7 @@ def run_test(): print( f"Peak Memory Used: {humanfriendly.format_size(peak_memory_usage - pre_mem_usage)}" ) - print(source.report.aspects) + print(source.report.as_string()) if __name__ == "__main__": diff --git a/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py b/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py index cb3a1c165acdd..307a7ba71ef83 100644 --- a/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py +++ b/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py @@ -1,7 +1,7 @@ import uuid from collections import defaultdict from datetime import datetime, timezone -from typing import Dict, Iterable, List +from typing import Dict, Iterable, List, Optional from databricks.sdk.service.catalog import ColumnTypeName from databricks.sdk.service.sql import QueryStatementType @@ -57,13 +57,15 @@ def assigned_metastore(self) -> Metastore: region=None, ) - def catalogs(self, metastore: Metastore) -> Iterable[Catalog]: + def catalogs(self, metastore: Optional[Metastore]) -> Iterable[Catalog]: for container in self.seed_metadata.containers[1]: - if not container.parent or metastore.name != container.parent.name: + if not container.parent or ( + metastore and metastore.name != container.parent.name + ): continue yield Catalog( - id=f"{metastore.id}.{container.name}", + id=f"{metastore.id}.{container.name}" if metastore else container.name, name=container.name, metastore=metastore, comment=None, @@ -153,7 +155,7 @@ def query_history( executed_as_user_name=None, ) - def table_lineage(self, table: Table) -> None: + def table_lineage(self, table: Table, include_entity_lineage: bool) -> None: pass def get_column_lineage(self, table: Table) -> None: diff --git a/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py index d995404ad69a5..97f65f1bd6a5b 100644 --- a/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py +++ b/metadata-ingestion/tests/unit/api/source_helpers/test_source_helpers.py @@ -14,6 +14,9 @@ make_dataset_urn, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.auto_work_units.auto_dataset_properties_aspect import ( + auto_patch_last_modified, +) from datahub.ingestion.api.source_helpers import ( _prepend_platform_instance, auto_browse_path_v2, @@ -21,8 +24,15 @@ auto_lowercase_urns, auto_status_aspect, auto_workunit, + create_dataset_props_patch_builder, ) from datahub.ingestion.api.workunit import MetadataWorkUnit +from datahub.metadata.schema_classes import ( + DatasetPropertiesClass, + OperationTypeClass, + TimeStampClass, +) +from datahub.specific.dataset import DatasetPatchBuilder _base_metadata: List[ Union[MetadataChangeProposalWrapper, models.MetadataChangeEventClass] @@ -146,7 +156,6 @@ def _make_browse_path_entries(path: List[str]) -> List[models.BrowsePathEntryCla def prepend_platform_instance( path: List[models.BrowsePathEntryClass], ) -> List[models.BrowsePathEntryClass]: - platform = "platform" instance = "instance" return _prepend_platform_instance(path, platform, instance) @@ -656,3 +665,150 @@ def test_auto_empty_dataset_usage_statistics_invalid_timestamp( changeType=models.ChangeTypeClass.CREATE, ).as_workunit(), ] + + +def get_sample_mcps(mcps_to_append: List = []) -> List[MetadataChangeProposalWrapper]: + mcps = [ + MetadataChangeProposalWrapper( + entityUrn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)", + aspect=models.OperationClass( + timestampMillis=10, + lastUpdatedTimestamp=12, + operationType=OperationTypeClass.CREATE, + ), + ), + MetadataChangeProposalWrapper( + entityUrn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)", + aspect=models.OperationClass( + timestampMillis=11, + lastUpdatedTimestamp=20, + operationType=OperationTypeClass.CREATE, + ), + ), + ] + mcps.extend(mcps_to_append) + return mcps + + +def to_patch_work_units(patch_builder: DatasetPatchBuilder) -> List[MetadataWorkUnit]: + return [ + MetadataWorkUnit( + id=MetadataWorkUnit.generate_workunit_id(patch_mcp), mcp_raw=patch_mcp + ) + for patch_mcp in patch_builder.build() + ] + + +def get_auto_generated_wu() -> List[MetadataWorkUnit]: + dataset_patch_builder = DatasetPatchBuilder( + urn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)" + ).set_last_modified(TimeStampClass(time=20)) + + auto_generated_work_units = to_patch_work_units(dataset_patch_builder) + + return auto_generated_work_units + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_no_change(): + mcps = [ + MetadataChangeProposalWrapper( + entityUrn="urn:li:container:008e111aa1d250dd52e0fd5d4b307b1a", + aspect=models.StatusClass(removed=False), + ) + ] + + initial_wu = list(auto_workunit(mcps)) + + expected = initial_wu + + assert ( + list(auto_patch_last_modified(initial_wu)) == expected + ) # There should be no change + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_max_last_updated_timestamp(): + mcps = get_sample_mcps() + + expected = list(auto_workunit(mcps)) + + auto_generated_work_units = get_auto_generated_wu() + + expected.extend(auto_generated_work_units) + + # work unit should contain a path of datasetProperties with lastModified set to max of operation.lastUpdatedTime + # i.e., 20 + assert list(auto_patch_last_modified(auto_workunit(mcps))) == expected + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_multi_patch(): + mcps = get_sample_mcps() + + dataset_patch_builder = DatasetPatchBuilder( + urn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)" + ) + + dataset_patch_builder.set_display_name("foo") + dataset_patch_builder.set_description("it is fake") + + patch_work_units = to_patch_work_units(dataset_patch_builder) + + work_units = [*list(auto_workunit(mcps)), *patch_work_units] + + auto_generated_work_units = get_auto_generated_wu() + + expected = [*work_units, *auto_generated_work_units] + + # In this case, the final work units include two patch units: one originating from the source and + # the other from auto_patch_last_modified. + assert list(auto_patch_last_modified(work_units)) == expected + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_last_modified_patch_exist(): + mcps = get_sample_mcps() + + patch_builder = create_dataset_props_patch_builder( + dataset_urn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)", + dataset_properties=DatasetPropertiesClass( + name="foo", + description="dataset for collection of foo", + lastModified=TimeStampClass(time=20), + ), + ) + + work_units = [ + *list(auto_workunit(mcps)), + *to_patch_work_units(patch_builder), + ] + # The input and output should align since the source is generating a patch for datasetProperties with the + # lastModified attribute. + # Therefore, `auto_patch_last_modified` should not create any additional patch. + assert list(auto_patch_last_modified(work_units)) == work_units + + +@freeze_time("2023-01-02 00:00:00") +def test_auto_patch_last_modified_last_modified_patch_not_exist(): + mcps = get_sample_mcps() + + patch_builder = create_dataset_props_patch_builder( + dataset_urn="urn:li:dataset:(urn:li:dataPlatform:dbt,abc.foo.bar,PROD)", + dataset_properties=DatasetPropertiesClass( + name="foo", + description="dataset for collection of foo", + ), + ) + + work_units = [ + *list(auto_workunit(mcps)), + *to_patch_work_units(patch_builder), + ] + + expected = [ + *work_units, + *get_auto_generated_wu(), # The output should include an additional patch for the `lastModified` attribute. + ] + + assert list(auto_patch_last_modified(work_units)) == expected diff --git a/metadata-ingestion/tests/unit/test_bigquery_source.py b/metadata-ingestion/tests/unit/test_bigquery_source.py index 8ec19e5bb9e56..38239d150dd6b 100644 --- a/metadata-ingestion/tests/unit/test_bigquery_source.py +++ b/metadata-ingestion/tests/unit/test_bigquery_source.py @@ -814,7 +814,7 @@ def test_table_processing_logic( _ = list( schema_gen.get_tables_for_dataset( - project_id="test-project", dataset_name="test-dataset" + project_id="test-project", dataset=BigqueryDataset("test-dataset") ) ) @@ -890,7 +890,7 @@ def test_table_processing_logic_date_named_tables( _ = list( schema_gen.get_tables_for_dataset( - project_id="test-project", dataset_name="test-dataset" + project_id="test-project", dataset=BigqueryDataset("test-dataset") ) ) diff --git a/metadata-ingestion/tests/unit/test_codegen.py b/metadata-ingestion/tests/unit/test_codegen.py index 0c7d953e194dc..37ac35586950e 100644 --- a/metadata-ingestion/tests/unit/test_codegen.py +++ b/metadata-ingestion/tests/unit/test_codegen.py @@ -6,9 +6,11 @@ import pytest import typing_inspect +from datahub.emitter.enum_helpers import get_enum_options from datahub.metadata.schema_classes import ( ASPECT_CLASSES, KEY_ASPECTS, + FabricTypeClass, FineGrainedLineageClass, MetadataChangeEventClass, OwnershipClass, @@ -157,3 +159,9 @@ def _err(msg: str) -> None: assert ( not errors ), f'To fix these errors, run "UPDATE_ENTITY_REGISTRY=true pytest {__file__}"' + + +def test_enum_options(): + # This is mainly a sanity check to ensure that it doesn't do anything too crazy. + env_options = get_enum_options(FabricTypeClass) + assert "PROD" in env_options diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/JavaGraphClient.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/JavaGraphClient.java index c54ba4a222b73..12c59324e3f7c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/JavaGraphClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/JavaGraphClient.java @@ -9,6 +9,7 @@ import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.search.utils.QueryUtils; +import io.datahubproject.metadata.context.OperationContext; import java.net.URISyntaxException; import java.util.List; import java.util.stream.Collectors; @@ -19,10 +20,13 @@ @Slf4j public class JavaGraphClient implements GraphClient { - GraphService _graphService; + private final OperationContext systemOpContext; + private final GraphService graphService; - public JavaGraphClient(@Nonnull GraphService graphService) { - this._graphService = graphService; + public JavaGraphClient( + @Nonnull OperationContext systemOpContext, @Nonnull GraphService graphService) { + this.systemOpContext = systemOpContext; + this.graphService = graphService; } /** @@ -43,7 +47,8 @@ public EntityRelationships getRelatedEntities( count = count == null ? DEFAULT_PAGE_SIZE : count; RelatedEntitiesResult relatedEntitiesResult = - _graphService.findRelatedEntities( + graphService.findRelatedEntities( + systemOpContext, null, QueryUtils.newFilter("urn", rawUrn), null, @@ -91,7 +96,8 @@ public EntityLineageResult getLineageEntities( @Nullable Integer count, int maxHops, String actor) { - return _graphService.getLineage( + return graphService.getLineage( + systemOpContext, UrnUtils.getUrn(rawUrn), direction, start != null ? start : 0, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java index 0dff287080842..f9287ab34cf19 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java @@ -64,13 +64,7 @@ public EntityLineageResult getLineage( if (separateSiblings) { return ValidationUtils.validateEntityLineageResult( opContext, - _graphService.getLineage( - entityUrn, - direction, - offset, - count, - maxHops, - opContext.getSearchContext().getLineageFlags()), + _graphService.getLineage(opContext, entityUrn, direction, offset, count, maxHops), _entityService); } @@ -81,13 +75,7 @@ public EntityLineageResult getLineage( } EntityLineageResult entityLineage = - _graphService.getLineage( - entityUrn, - direction, - offset, - count, - maxHops, - opContext.getSearchContext().getLineageFlags()); + _graphService.getLineage(opContext, entityUrn, direction, offset, count, maxHops); Siblings siblingAspectOfEntity = (Siblings) _entityService.getLatestAspect(opContext, entityUrn, SIBLINGS_ASPECT_NAME); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/SystemGraphRetriever.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/SystemGraphRetriever.java new file mode 100644 index 0000000000000..33cb1a7130f14 --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/SystemGraphRetriever.java @@ -0,0 +1,48 @@ +package com.linkedin.metadata.graph; + +import com.linkedin.metadata.aspect.GraphRetriever; +import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.RelationshipFilter; +import com.linkedin.metadata.query.filter.SortCriterion; +import io.datahubproject.metadata.context.OperationContext; +import java.util.List; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Builder; +import lombok.Setter; + +@Builder +public class SystemGraphRetriever implements GraphRetriever { + @Setter private OperationContext systemOperationContext; + @Nonnull private final GraphService graphService; + + @Nonnull + @Override + public RelatedEntitiesScrollResult scrollRelatedEntities( + @Nullable List sourceTypes, + @Nonnull Filter sourceEntityFilter, + @Nullable List destinationTypes, + @Nonnull Filter destinationEntityFilter, + @Nonnull List relationshipTypes, + @Nonnull RelationshipFilter relationshipFilter, + @Nonnull List sortCriteria, + @Nullable String scrollId, + int count, + @Nullable Long startTimeMillis, + @Nullable Long endTimeMillis) { + return graphService.scrollRelatedEntities( + systemOperationContext, + sourceTypes, + sourceEntityFilter, + destinationTypes, + destinationEntityFilter, + relationshipTypes, + relationshipFilter, + sortCriteria, + scrollId, + count, + startTimeMillis, + endTimeMillis); + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java index 6703e07bfd915..1c4a986e24b04 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java @@ -19,6 +19,7 @@ import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.query.filter.SortCriterion; +import io.datahubproject.metadata.context.OperationContext; import io.dgraph.DgraphClient; import io.dgraph.DgraphProto.Mutation; import io.dgraph.DgraphProto.NQuad; @@ -366,7 +367,7 @@ protected static String getQueryForRelatedEntities( filters.add( String.format( "%s as var(func: eq(<%s>, \"%s\"))", - sourceFilterName, criterion.getField(), criterion.getValue())); + sourceFilterName, criterion.getField(), criterion.getValues().get(0))); }); } @@ -383,7 +384,7 @@ protected static String getQueryForRelatedEntities( filters.add( String.format( "%s as var(func: eq(<%s>, \"%s\"))", - sourceFilterName, criterion.getField(), criterion.getValue())); + sourceFilterName, criterion.getField(), criterion.getValues().get(0))); }); } @@ -453,6 +454,7 @@ public void removeEdge(final Edge edge) { @Nonnull @Override public RelatedEntitiesResult findRelatedEntities( + @Nonnull final OperationContext opContext, @Nullable List sourceTypes, @Nonnull Filter sourceEntityFilter, @Nullable List destinationTypes, @@ -662,7 +664,7 @@ protected static List getRelatedEntitiesFromResponseData( } @Override - public void removeNode(@Nonnull Urn urn) { + public void removeNode(@Nonnull final OperationContext opContext, @Nonnull Urn urn) { String query = String.format("query {\n" + " node as var(func: eq(urn, \"%s\"))\n" + "}", urn); String deletion = "uid(node) * * ."; @@ -679,6 +681,7 @@ public void removeNode(@Nonnull Urn urn) { @Override public void removeEdgesFromNode( + @Nonnull final OperationContext opContext, @Nonnull Urn urn, @Nonnull List relationshipTypes, @Nonnull RelationshipFilter relationshipFilter) { @@ -782,6 +785,7 @@ public void clear() { @Nonnull @Override public RelatedEntitiesScrollResult scrollRelatedEntities( + @Nonnull OperationContext opContext, @Nullable List sourceTypes, @Nonnull Filter sourceEntityFilter, @Nullable List destinationTypes, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index 50e5aa6ba893d..8c7f0e3256cf8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -14,6 +14,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.IntegerArray; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.graph.GraphFilters; import com.linkedin.metadata.graph.LineageDirection; @@ -34,14 +35,17 @@ import com.linkedin.metadata.utils.DataPlatformInstanceUtils; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.metadata.utils.metrics.MetricUtils; +import io.datahubproject.metadata.context.OperationContext; import io.opentelemetry.extension.annotations.WithSpan; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ConcurrentHashMap; @@ -105,8 +109,7 @@ public class ESGraphQueryDAO { static final String GROUP_BY_DESTINATION_AGG = "group_by_destination"; static final String TOP_DOCUMENTS_AGG = "top_documents"; - @Nonnull - public static void addFilterToQueryBuilder( + private static void addFilterToQueryBuilder( @Nonnull Filter filter, @Nullable String node, BoolQueryBuilder rootQuery) { BoolQueryBuilder orQuery = new BoolQueryBuilder(); for (ConjunctiveCriterion conjunction : filter.getOr()) { @@ -120,9 +123,9 @@ public static void addFilterToQueryBuilder( criterionArray.forEach( criterion -> andQuery.filter( - QueryBuilders.termQuery( + QueryBuilders.termsQuery( (node == null ? "" : node + ".") + criterion.getField(), - criterion.getValue()))); + criterion.getValues()))); orQuery.should(andQuery); } rootQuery.filter(orQuery); @@ -231,7 +234,7 @@ private SearchResponse executeGroupByLineageSearchQuery( } } - private BoolQueryBuilder getAggregationFilter( + private static BoolQueryBuilder getAggregationFilter( Pair pair, RelationshipDirection direction) { BoolQueryBuilder subFilter = QueryBuilders.boolQuery(); TermQueryBuilder relationshipTypeTerm = @@ -258,6 +261,7 @@ private BoolQueryBuilder getAggregationFilter( } public SearchResponse getSearchResponse( + @Nonnull final OperationContext opContext, @Nullable final List sourceTypes, @Nonnull final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -268,6 +272,8 @@ public SearchResponse getSearchResponse( final int count) { BoolQueryBuilder finalQuery = buildQuery( + opContext, + graphQueryConfiguration, sourceTypes, sourceEntityFilter, destinationTypes, @@ -279,6 +285,8 @@ public SearchResponse getSearchResponse( } public static BoolQueryBuilder buildQuery( + @Nonnull final OperationContext opContext, + @Nonnull final GraphQueryConfiguration graphQueryConfiguration, @Nullable final List sourceTypes, @Nullable final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -286,6 +294,8 @@ public static BoolQueryBuilder buildQuery( @Nonnull final List relationshipTypes, @Nonnull final RelationshipFilter relationshipFilter) { return buildQuery( + opContext, + graphQueryConfiguration, sourceTypes, sourceEntityFilter, destinationTypes, @@ -296,6 +306,8 @@ public static BoolQueryBuilder buildQuery( } public static BoolQueryBuilder buildQuery( + @Nonnull final OperationContext opContext, + @Nonnull final GraphQueryConfiguration graphQueryConfiguration, @Nullable final List sourceTypes, @Nonnull final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -345,19 +357,23 @@ public static BoolQueryBuilder buildQuery( if (lifecycleOwner != null) { finalQuery.filter(QueryBuilders.termQuery(EDGE_FIELD_LIFECYCLE_OWNER, lifecycleOwner)); } + if (!Optional.ofNullable(opContext.getSearchContext().getSearchFlags().isIncludeSoftDeleted()) + .orElse(false)) { + applyExcludeSoftDelete(graphQueryConfiguration, finalQuery); + } return finalQuery; } @WithSpan public LineageResponse getLineage( + @Nonnull final OperationContext opContext, @Nonnull Urn entityUrn, @Nonnull LineageDirection direction, GraphFilters graphFilters, int offset, int count, - int maxHops, - @Nullable LineageFlags lineageFlags) { + int maxHops) { Map result = new HashMap<>(); long currentTime = System.currentTimeMillis(); long remainingTime = graphQueryConfiguration.getTimeoutSeconds() * 1000; @@ -388,6 +404,7 @@ public LineageResponse getLineage( // Do one hop on the lineage graph Stream intermediateStream = processOneHopLineage( + opContext, currentLevel, remainingTime, direction, @@ -398,7 +415,6 @@ public LineageResponse getLineage( existingPaths, exploreMultiplePaths, result, - lineageFlags, i); currentLevel = intermediateStream.collect(Collectors.toList()); currentTime = System.currentTimeMillis(); @@ -421,6 +437,7 @@ public LineageResponse getLineage( } private Stream processOneHopLineage( + @Nonnull OperationContext opContext, List currentLevel, Long remainingTime, LineageDirection direction, @@ -431,7 +448,6 @@ private Stream processOneHopLineage( Map existingPaths, boolean exploreMultiplePaths, Map result, - LineageFlags lineageFlags, int i) { // Do one hop on the lineage graph @@ -439,6 +455,7 @@ private Stream processOneHopLineage( int remainingHops = maxHops - numHops; List oneHopRelationships = getLineageRelationshipsInBatches( + opContext, currentLevel, direction, graphFilters, @@ -448,8 +465,10 @@ private Stream processOneHopLineage( remainingHops, remainingTime, existingPaths, - exploreMultiplePaths, - lineageFlags); + exploreMultiplePaths); + + final LineageFlags lineageFlags = opContext.getSearchContext().getLineageFlags(); + for (LineageRelationship oneHopRelnship : oneHopRelationships) { if (result.containsKey(oneHopRelnship.getEntity())) { log.debug("Urn encountered again during graph walk {}", oneHopRelnship.getEntity()); @@ -487,6 +506,7 @@ private Stream processOneHopLineage( if (!additionalCurrentLevel.isEmpty()) { Stream ignoreAsHopUrns = processOneHopLineage( + opContext, additionalCurrentLevel, remainingTime, direction, @@ -497,7 +517,6 @@ private Stream processOneHopLineage( existingPaths, exploreMultiplePaths, result, - lineageFlags, i); intermediateStream = Stream.concat(intermediateStream, ignoreAsHopUrns); } @@ -560,6 +579,7 @@ private LineageRelationship mergeLineageRelationships( // Get 1-hop lineage relationships asynchronously in batches with timeout @WithSpan public List getLineageRelationshipsInBatches( + @Nonnull final OperationContext opContext, @Nonnull List entityUrns, @Nonnull LineageDirection direction, GraphFilters graphFilters, @@ -569,8 +589,7 @@ public List getLineageRelationshipsInBatches( int remainingHops, long remainingTime, Map existingPaths, - boolean exploreMultiplePaths, - @Nullable LineageFlags lineageFlags) { + boolean exploreMultiplePaths) { List> batches = Lists.partition(entityUrns, graphQueryConfiguration.getBatchSize()); return ConcurrencyUtils.getAllCompleted( batches.stream() @@ -579,6 +598,7 @@ public List getLineageRelationshipsInBatches( CompletableFuture.supplyAsync( () -> getLineageRelationships( + opContext, batchUrns, direction, graphFilters, @@ -587,8 +607,7 @@ public List getLineageRelationshipsInBatches( numHops, remainingHops, existingPaths, - exploreMultiplePaths, - lineageFlags))) + exploreMultiplePaths))) .collect(Collectors.toList()), remainingTime, TimeUnit.MILLISECONDS) @@ -600,6 +619,7 @@ public List getLineageRelationshipsInBatches( // Get 1-hop lineage relationships @WithSpan private List getLineageRelationships( + @Nonnull final OperationContext opContext, @Nonnull List entityUrns, @Nonnull LineageDirection direction, GraphFilters graphFilters, @@ -608,8 +628,8 @@ private List getLineageRelationships( int numHops, int remainingHops, Map existingPaths, - boolean exploreMultiplePaths, - @Nullable LineageFlags lineageFlags) { + boolean exploreMultiplePaths) { + final LineageFlags lineageFlags = opContext.getSearchContext().getLineageFlags(); Map> urnsPerEntityType = entityUrns.stream().collect(Collectors.groupingBy(Urn::getEntityType)); Map> edgesPerEntityType = @@ -628,7 +648,7 @@ private List getLineageRelationships( .collect(Collectors.toSet()); QueryBuilder finalQuery = - getLineageQuery(urnsPerEntityType, edgesPerEntityType, graphFilters, lineageFlags); + getLineageQuery(opContext, urnsPerEntityType, edgesPerEntityType, graphFilters); SearchResponse response; if (lineageFlags != null && lineageFlags.getEntitiesExploredPerHopLimit() != null) { response = @@ -660,11 +680,12 @@ private List getLineageRelationships( } @VisibleForTesting - public QueryBuilder getLineageQuery( + public static QueryBuilder getLineageQuery( + @Nonnull OperationContext opContext, @Nonnull Map> urnsPerEntityType, @Nonnull Map> edgesPerEntityType, - @Nonnull GraphFilters graphFilters, - @Nullable LineageFlags lineageFlags) { + @Nonnull GraphFilters graphFilters) { + final LineageFlags lineageFlags = opContext.getSearchContext().getLineageFlags(); BoolQueryBuilder entityTypeQueries = QueryBuilders.boolQuery(); // Get all relation types relevant to the set of urns to hop from urnsPerEntityType.forEach( @@ -690,7 +711,7 @@ public QueryBuilder getLineageQuery( && lineageFlags.getStartTimeMillis() != null && lineageFlags.getEndTimeMillis() != null) { finalQuery.filter( - TimeFilterUtils.getEdgeTimeFilterQuery( + GraphFilterUtils.getEdgeTimeFilterQuery( lineageFlags.getStartTimeMillis(), lineageFlags.getEndTimeMillis())); } else { log.debug("Empty time filter range provided. Skipping application of time filters"); @@ -700,7 +721,7 @@ public QueryBuilder getLineageQuery( } @VisibleForTesting - public QueryBuilder getLineageQueryForEntityType( + static QueryBuilder getLineageQueryForEntityType( @Nonnull List urns, @Nonnull List lineageEdges, @Nonnull GraphFilters graphFilters) { @@ -769,7 +790,7 @@ private void addViaNodeBoostQuery(final SearchSourceBuilder sourceBuilder) { * the Graph Store. */ @VisibleForTesting - public static void addEdgeToPaths( + static void addEdgeToPaths( @Nonnull final Map existingPaths, @Nonnull final Urn parentUrn, @Nonnull final Urn childUrn) { @@ -782,7 +803,7 @@ private static boolean containsCycle(final UrnArray path) { return (path.size() != urnSet.size()); } - public static boolean addEdgeToPaths( + static boolean addEdgeToPaths( @Nonnull final Map existingPaths, @Nonnull final Urn parentUrn, final Urn viaUrn, @@ -1317,6 +1338,7 @@ public static class LineageResponse { } public SearchResponse getSearchResponse( + @Nonnull final OperationContext opContext, @Nullable final List sourceTypes, @Nullable final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -1329,6 +1351,8 @@ public SearchResponse getSearchResponse( BoolQueryBuilder finalQuery = buildQuery( + opContext, + graphQueryConfiguration, sourceTypes, sourceEntityFilter, destinationTypes, @@ -1371,4 +1395,17 @@ private SearchResponse executeScrollSearchQuery( throw new ESQueryException("Search query failed:", e); } } + + private static void applyExcludeSoftDelete( + GraphQueryConfiguration graphQueryConfiguration, BoolQueryBuilder boolQueryBuilder) { + if (graphQueryConfiguration.isGraphStatusEnabled()) { + Arrays.stream(EdgeUrnType.values()) + .map( + edgeUrnType -> + QueryBuilders.termsQuery( + GraphFilterUtils.getUrnStatusFieldName(edgeUrnType), "true")) + .filter(statusQuery -> !boolQueryBuilder.mustNot().contains(statusQuery)) + .forEach(boolQueryBuilder::mustNot); + } + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java index ddbd00f90ef68..ba481bdfa109f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAO.java @@ -4,10 +4,12 @@ import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.INDEX_NAME; import com.google.common.collect.ImmutableList; +import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.RelationshipFilter; import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import io.datahubproject.metadata.context.OperationContext; import java.util.List; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -17,7 +19,9 @@ import org.opensearch.action.update.UpdateRequest; import org.opensearch.common.xcontent.XContentType; import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.reindex.BulkByScrollResponse; +import org.opensearch.script.Script; @Slf4j @RequiredArgsConstructor @@ -25,8 +29,7 @@ public class ESGraphWriteDAO { private final IndexConvention indexConvention; private final ESBulkProcessor bulkProcessor; private final int numRetries; - - private static final String ES_WRITES_METRIC = "num_elasticSearch_writes"; + private final GraphQueryConfiguration graphQueryConfiguration; /** * Updates or inserts the given search document. @@ -56,6 +59,7 @@ public void deleteDocument(@Nonnull String docId) { } public BulkByScrollResponse deleteByQuery( + @Nonnull final OperationContext opContext, @Nullable final String sourceType, @Nonnull final Filter sourceEntityFilter, @Nullable final String destinationType, @@ -64,6 +68,8 @@ public BulkByScrollResponse deleteByQuery( @Nonnull final RelationshipFilter relationshipFilter) { BoolQueryBuilder finalQuery = buildQuery( + opContext, + graphQueryConfiguration, sourceType == null ? ImmutableList.of() : ImmutableList.of(sourceType), sourceEntityFilter, destinationType == null ? ImmutableList.of() : ImmutableList.of(destinationType), @@ -77,6 +83,7 @@ public BulkByScrollResponse deleteByQuery( } public BulkByScrollResponse deleteByQuery( + @Nonnull final OperationContext opContext, @Nullable final String sourceType, @Nonnull final Filter sourceEntityFilter, @Nullable final String destinationType, @@ -86,6 +93,8 @@ public BulkByScrollResponse deleteByQuery( String lifecycleOwner) { BoolQueryBuilder finalQuery = buildQuery( + opContext, + graphQueryConfiguration, sourceType == null ? ImmutableList.of() : ImmutableList.of(sourceType), sourceEntityFilter, destinationType == null ? ImmutableList.of() : ImmutableList.of(destinationType), @@ -98,4 +107,12 @@ public BulkByScrollResponse deleteByQuery( .deleteByQuery(finalQuery, indexConvention.getIndexName(INDEX_NAME)) .orElse(null); } + + @Nullable + public BulkByScrollResponse updateByQuery( + @Nonnull Script script, @Nonnull final QueryBuilder query) { + return bulkProcessor + .updateByQuery(script, query, indexConvention.getIndexName(INDEX_NAME)) + .orElse(null); + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java index e1532ea4e26c0..1068fae9478e1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java @@ -1,12 +1,16 @@ package com.linkedin.metadata.graph.elastic; import static com.linkedin.metadata.aspect.models.graph.Edge.*; +import static com.linkedin.metadata.graph.elastic.GraphFilterUtils.getUrnStatusFieldName; +import static com.linkedin.metadata.graph.elastic.GraphFilterUtils.getUrnStatusQuery; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.aspect.models.graph.Edge; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; import com.linkedin.metadata.aspect.models.graph.RelatedEntities; import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; import com.linkedin.metadata.aspect.models.graph.RelatedEntity; @@ -17,7 +21,6 @@ import com.linkedin.metadata.graph.LineageRelationshipArray; import com.linkedin.metadata.graph.RelatedEntitiesResult; import com.linkedin.metadata.models.registry.LineageRegistry; -import com.linkedin.metadata.query.LineageFlags; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -35,6 +38,7 @@ import com.linkedin.metadata.utils.elasticsearch.IndexConvention; import com.linkedin.structured.StructuredPropertyDefinition; import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; import io.opentelemetry.extension.annotations.WithSpan; import java.io.IOException; import java.util.ArrayList; @@ -51,13 +55,15 @@ import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.opensearch.action.search.SearchResponse; +import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; +import org.opensearch.script.Script; +import org.opensearch.script.ScriptType; import org.opensearch.search.SearchHit; @Slf4j @RequiredArgsConstructor public class ElasticSearchGraphService implements GraphService, ElasticSearchIndexed { - private final LineageRegistry _lineageRegistry; private final ESBulkProcessor _esBulkProcessor; private final IndexConvention _indexConvention; @@ -68,7 +74,7 @@ public class ElasticSearchGraphService implements GraphService, ElasticSearchInd public static final String INDEX_NAME = "graph_service_v1"; private static final Map EMPTY_HASH = new HashMap<>(); - private String toDocument(@Nonnull final Edge edge) { + private static String toDocument(@Nonnull final Edge edge) { final ObjectNode searchDocument = JsonNodeFactory.instance.objectNode(); final ObjectNode sourceObject = JsonNodeFactory.instance.objectNode(); @@ -114,6 +120,18 @@ private String toDocument(@Nonnull final Edge edge) { if (edge.getVia() != null) { searchDocument.put(EDGE_FIELD_VIA, edge.getVia().toString()); } + if (edge.getViaStatus() != null) { + searchDocument.put(EDGE_FIELD_VIA_STATUS, edge.getViaStatus()); + } + if (edge.getLifecycleOwnerStatus() != null) { + searchDocument.put(EDGE_FIELD_LIFECYCLE_OWNER_STATUS, edge.getLifecycleOwnerStatus()); + } + if (edge.getSourceStatus() != null) { + searchDocument.put(EDGE_SOURCE_STATUS, edge.getSourceStatus()); + } + if (edge.getDestinationStatus() != null) { + searchDocument.put(EDGE_DESTINATION_STATUS, edge.getDestinationStatus()); + } log.debug("Search doc for write {}", searchDocument); return searchDocument.toString(); @@ -142,8 +160,10 @@ public void removeEdge(@Nonnull final Edge edge) { _graphWriteDAO.deleteDocument(docId); } + @Override @Nonnull public RelatedEntitiesResult findRelatedEntities( + @Nonnull final OperationContext opContext, @Nullable final List sourceTypes, @Nonnull final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -161,6 +181,7 @@ public RelatedEntitiesResult findRelatedEntities( SearchResponse response = _graphReadDAO.getSearchResponse( + opContext, sourceTypes, sourceEntityFilter, destinationTypes, @@ -188,35 +209,16 @@ public RelatedEntitiesResult findRelatedEntities( @Override @Deprecated public EntityLineageResult getLineage( + @Nonnull final OperationContext opContext, @Nonnull Urn entityUrn, @Nonnull LineageDirection direction, GraphFilters graphFilters, int offset, int count, int maxHops) { - ESGraphQueryDAO.LineageResponse lineageResponse = - _graphReadDAO.getLineage(entityUrn, direction, graphFilters, offset, count, maxHops, null); - return new EntityLineageResult() - .setRelationships(new LineageRelationshipArray(lineageResponse.getLineageRelationships())) - .setStart(offset) - .setCount(count) - .setTotal(lineageResponse.getTotal()); - } - - @Nonnull - @WithSpan - @Override - public EntityLineageResult getLineage( - @Nonnull Urn entityUrn, - @Nonnull LineageDirection direction, - GraphFilters graphFilters, - int offset, - int count, - int maxHops, - @Nullable LineageFlags lineageFlags) { ESGraphQueryDAO.LineageResponse lineageResponse = _graphReadDAO.getLineage( - entityUrn, direction, graphFilters, offset, count, maxHops, lineageFlags); + opContext, entityUrn, direction, graphFilters, offset, count, maxHops); return new EntityLineageResult() .setRelationships(new LineageRelationshipArray(lineageResponse.getLineageRelationships())) .setStart(offset) @@ -224,13 +226,10 @@ public EntityLineageResult getLineage( .setTotal(lineageResponse.getTotal()); } - private Filter createUrnFilter(@Nonnull final Urn urn) { + private static Filter createUrnFilter(@Nonnull final Urn urn) { Filter filter = new Filter(); CriterionArray criterionArray = new CriterionArray(); - Criterion criterion = new Criterion(); - criterion.setCondition(Condition.EQUAL); - criterion.setField("urn"); - criterion.setValue(urn.toString()); + Criterion criterion = buildCriterion("urn", Condition.EQUAL, urn.toString()); criterionArray.add(criterion); filter.setOr( new ConjunctiveCriterionArray( @@ -239,7 +238,7 @@ private Filter createUrnFilter(@Nonnull final Urn urn) { return filter; } - public void removeNode(@Nonnull final Urn urn) { + public void removeNode(@Nonnull final OperationContext opContext, @Nonnull final Urn urn) { Filter urnFilter = createUrnFilter(urn); Filter emptyFilter = new Filter().setOr(new ConjunctiveCriterionArray()); List relationshipTypes = new ArrayList<>(); @@ -250,19 +249,47 @@ public void removeNode(@Nonnull final Urn urn) { new RelationshipFilter().setDirection(RelationshipDirection.INCOMING); _graphWriteDAO.deleteByQuery( - null, urnFilter, null, emptyFilter, relationshipTypes, outgoingFilter); + opContext, null, urnFilter, null, emptyFilter, relationshipTypes, outgoingFilter); _graphWriteDAO.deleteByQuery( - null, urnFilter, null, emptyFilter, relationshipTypes, incomingFilter); + opContext, null, urnFilter, null, emptyFilter, relationshipTypes, incomingFilter); // Delete all edges where this entity is a lifecycle owner _graphWriteDAO.deleteByQuery( - null, emptyFilter, null, emptyFilter, relationshipTypes, incomingFilter, urn.toString()); + opContext, + null, + emptyFilter, + null, + emptyFilter, + relationshipTypes, + incomingFilter, + urn.toString()); + } - return; + @Override + public void setEdgeStatus( + @Nonnull Urn urn, boolean removed, @Nonnull EdgeUrnType... edgeUrnTypes) { + + for (EdgeUrnType edgeUrnType : edgeUrnTypes) { + // Update the graph status fields per urn type which do not match target state + QueryBuilder negativeQuery = getUrnStatusQuery(edgeUrnType, urn, !removed); + + // Set up the script to update the boolean field + String scriptContent = + "ctx._source." + getUrnStatusFieldName(edgeUrnType) + " = params.newValue"; + Script script = + new Script( + ScriptType.INLINE, + "painless", + scriptContent, + Collections.singletonMap("newValue", removed)); + + _graphWriteDAO.updateByQuery(script, negativeQuery); + } } public void removeEdgesFromNode( + @Nonnull final OperationContext opContext, @Nonnull final Urn urn, @Nonnull final List relationshipTypes, @Nonnull final RelationshipFilter relationshipFilter) { @@ -271,7 +298,7 @@ public void removeEdgesFromNode( Filter emptyFilter = new Filter().setOr(new ConjunctiveCriterionArray()); _graphWriteDAO.deleteByQuery( - null, urnFilter, null, emptyFilter, relationshipTypes, relationshipFilter); + opContext, null, urnFilter, null, emptyFilter, relationshipTypes, relationshipFilter); } @Override @@ -308,8 +335,8 @@ public boolean supportsMultiHop() { } @Nonnull - @Override public RelatedEntitiesScrollResult scrollRelatedEntities( + @Nonnull final OperationContext opContext, @Nullable List sourceTypes, @Nullable Filter sourceEntityFilter, @Nullable List destinationTypes, @@ -326,6 +353,7 @@ public RelatedEntitiesScrollResult scrollRelatedEntities( SearchResponse response = _graphReadDAO.getSearchResponse( + opContext, sourceTypes, sourceEntityFilter, destinationTypes, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphFilterUtils.java similarity index 67% rename from metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java rename to metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphFilterUtils.java index 7ee84ce834cfa..982bcae9b5fd9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/TimeFilterUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphFilterUtils.java @@ -1,14 +1,81 @@ package com.linkedin.metadata.graph.elastic; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_DESTINATION_STATUS; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_DESTINATION_URN_FIELD; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_FIELD_LIFECYCLE_OWNER; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_FIELD_LIFECYCLE_OWNER_STATUS; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_FIELD_VIA; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_FIELD_VIA_STATUS; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_SOURCE_STATUS; +import static com.linkedin.metadata.aspect.models.graph.Edge.EDGE_SOURCE_URN_FIELD; import static com.linkedin.metadata.graph.elastic.ESGraphQueryDAO.*; +import com.linkedin.common.urn.Urn; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; +import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.query.QueryBuilders; @Slf4j -public class TimeFilterUtils { +public class GraphFilterUtils { + + public static QueryBuilder getUrnStatusQuery( + @Nonnull EdgeUrnType edgeUrnType, @Nonnull final Urn urn, @Nonnull Boolean removed) { + + final String urnField = getUrnFieldName(edgeUrnType); + final String statusField = getUrnStatusFieldName(edgeUrnType); + + // Create a BoolQueryBuilder + BoolQueryBuilder finalQuery = QueryBuilders.boolQuery(); + + // urn filter + finalQuery.filter(QueryBuilders.termQuery(urnField, urn.toString())); + + // status filter + if (removed) { + finalQuery.filter(QueryBuilders.termQuery(statusField, removed.toString())); + } else { + finalQuery.minimumShouldMatch(1); + finalQuery.should(QueryBuilders.termQuery(statusField, removed.toString())); + finalQuery.should(QueryBuilders.boolQuery().mustNot(QueryBuilders.existsQuery(statusField))); + } + + return finalQuery; + } + + public static String getUrnStatusFieldName(EdgeUrnType edgeUrnType) { + switch (edgeUrnType) { + case SOURCE: + return EDGE_SOURCE_STATUS; + case DESTINATION: + return EDGE_DESTINATION_STATUS; + case VIA: + return EDGE_FIELD_VIA_STATUS; + case LIFECYCLE_OWNER: + return EDGE_FIELD_LIFECYCLE_OWNER_STATUS; + default: + throw new IllegalStateException( + String.format("Unhandled EdgeUrnType. Found: %s", edgeUrnType)); + } + } + + public static String getUrnFieldName(EdgeUrnType edgeUrnType) { + switch (edgeUrnType) { + case SOURCE: + return EDGE_SOURCE_URN_FIELD; + case DESTINATION: + return EDGE_DESTINATION_URN_FIELD; + case VIA: + return EDGE_FIELD_VIA; + case LIFECYCLE_OWNER: + return EDGE_FIELD_LIFECYCLE_OWNER; + default: + throw new IllegalStateException( + String.format("Unhandled EdgeUrnType. Found: %s", edgeUrnType)); + } + } /** * In order to filter for edges that fall into a specific filter window, we perform a @@ -141,5 +208,5 @@ private static QueryBuilder buildManualLineageFilter() { return QueryBuilders.termQuery(String.format("%s.%s", PROPERTIES, SOURCE), UI); } - private TimeFilterUtils() {} + private GraphFilterUtils() {} } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphRelationshipMappingsBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphRelationshipMappingsBuilder.java index 7a6c7701fde5f..164bf3ad17d8c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphRelationshipMappingsBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/GraphRelationshipMappingsBuilder.java @@ -20,6 +20,8 @@ public static Map getMappings() { mappings.put(EDGE_FIELD_PROPERTIES, getMappingsForEdgeProperties()); mappings.put(EDGE_FIELD_LIFECYCLE_OWNER, getMappingsForKeyword()); mappings.put(EDGE_FIELD_VIA, getMappingsForKeyword()); + mappings.put(EDGE_FIELD_LIFECYCLE_OWNER_STATUS, getMappingsForBoolean()); + mappings.put(EDGE_FIELD_VIA_STATUS, getMappingsForBoolean()); return ImmutableMap.of("properties", mappings); } @@ -27,12 +29,17 @@ private static Map getMappingsForKeyword() { return ImmutableMap.builder().put("type", "keyword").build(); } + private static Map getMappingsForBoolean() { + return ImmutableMap.builder().put("type", "boolean").build(); + } + private static Map getMappingsForEntity() { Map mappings = ImmutableMap.builder() .put("urn", getMappingsForKeyword()) .put("entityType", getMappingsForKeyword()) + .put("removed", getMappingsForBoolean()) .build(); return ImmutableMap.of("properties", mappings); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java index 9fe9c242fe48c..ef748ebd23278 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java @@ -32,7 +32,7 @@ import com.linkedin.metadata.search.elasticsearch.query.request.SearchAfterWrapper; import com.linkedin.metadata.utils.metrics.MetricUtils; import com.linkedin.util.Pair; -import io.opentelemetry.extension.annotations.WithSpan; +import io.datahubproject.metadata.context.OperationContext; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collections; @@ -250,33 +250,24 @@ public void removeEdge(final Edge edge) { } @Nonnull - @WithSpan @Override public EntityLineageResult getLineage( + @Nonnull final OperationContext opContext, @Nonnull Urn entityUrn, @Nonnull LineageDirection direction, GraphFilters graphFilters, int offset, int count, int maxHops) { - return getLineage(entityUrn, direction, graphFilters, offset, count, maxHops, null); - } - - @Nonnull - @Override - public EntityLineageResult getLineage( - @Nonnull Urn entityUrn, - @Nonnull LineageDirection direction, - GraphFilters graphFilters, - int offset, - int count, - int maxHops, - @Nullable LineageFlags lineageFlags) { log.debug(String.format("Neo4j getLineage maxHops = %d", maxHops)); final var statementAndParams = generateLineageStatementAndParameters( - entityUrn, direction, graphFilters, maxHops, lineageFlags); + entityUrn, + direction, + graphFilters, + maxHops, + opContext.getSearchContext().getLineageFlags()); final var statement = statementAndParams.getFirst(); final var parameters = statementAndParams.getSecond(); @@ -457,6 +448,7 @@ private Pair> generateLineageStatementAndParameters( @Nonnull public RelatedEntitiesResult findRelatedEntities( + @Nonnull final OperationContext opContext, @Nullable final List sourceTypes, @Nonnull final Filter sourceEntityFilter, @Nullable final List destinationTypes, @@ -497,7 +489,7 @@ public RelatedEntitiesResult findRelatedEntities( // Create a URN from the String. Only proceed if srcCriteria is not null or empty if (StringUtils.isNotEmpty(srcCriteria)) { final String urnValue = - sourceEntityFilter.getOr().get(0).getAnd().get(0).getValue().toString(); + sourceEntityFilter.getOr().get(0).getAnd().get(0).getValues().get(0).toString(); try { final Urn urn = Urn.createFromString(urnValue); srcNodeLabel = urn.getEntityType(); @@ -600,7 +592,7 @@ private String computeEntityTypeWhereClause( return whereClause; } - public void removeNode(@Nonnull final Urn urn) { + public void removeNode(@Nonnull final OperationContext opContext, @Nonnull final Urn urn) { log.debug(String.format("Removing Neo4j node with urn: %s", urn)); final String srcNodeLabel = urn.getEntityType(); @@ -627,6 +619,7 @@ public void removeNode(@Nonnull final Urn urn) { * @param relationshipFilter Query relationship filter */ public void removeEdgesFromNode( + @Nonnull final OperationContext opContext, @Nonnull final Urn urn, @Nonnull final List relationshipTypes, @Nonnull final RelationshipFilter relationshipFilter) { @@ -847,7 +840,8 @@ private static String criterionToString(@Nonnull CriterionArray criterionArray) final StringJoiner joiner = new StringJoiner(",", "{", "}"); criterionArray.forEach( - criterion -> joiner.add(toCriterionString(criterion.getField(), criterion.getValue()))); + criterion -> + joiner.add(toCriterionString(criterion.getField(), criterion.getValues().get(0)))); return joiner.length() <= 2 ? "" : joiner.toString(); } @@ -915,6 +909,7 @@ private boolean isSourceDestReversed( @Nonnull @Override public RelatedEntitiesScrollResult scrollRelatedEntities( + @Nonnull OperationContext opContext, @Nullable List sourceTypes, @Nonnull Filter sourceEntityFilter, @Nullable List destinationTypes, @@ -949,7 +944,7 @@ public RelatedEntitiesScrollResult scrollRelatedEntities( // Create a URN from the String. Only proceed if srcCriteria is not null or empty if (StringUtils.isNotEmpty(srcCriteria)) { final String urnValue = - sourceEntityFilter.getOr().get(0).getAnd().get(0).getValue().toString(); + sourceEntityFilter.getOr().get(0).getAnd().get(0).getValues().get(0).toString(); try { final Urn urn = Urn.createFromString(urnValue); srcNodeLabel = urn.getEntityType(); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index 435731a3f9d04..67ebdf8882b80 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -26,7 +26,6 @@ import com.linkedin.metadata.query.GroupingSpec; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -173,13 +172,7 @@ public LineageSearchResult searchAcrossLineage( if (cachedLineageResult == null || finalOpContext.getSearchContext().getSearchFlags().isSkipCache()) { lineageResult = - _graphService.getLineage( - sourceUrn, - direction, - 0, - MAX_RELATIONSHIPS, - maxHops, - opContext.getSearchContext().getLineageFlags()); + _graphService.getLineage(opContext, sourceUrn, direction, 0, MAX_RELATIONSHIPS, maxHops); if (cacheEnabled) { try { cache.put( @@ -210,12 +203,7 @@ public LineageSearchResult searchAcrossLineage( // we have to refetch EntityLineageResult result = _graphService.getLineage( - sourceUrn, - direction, - 0, - MAX_RELATIONSHIPS, - finalMaxHops, - opContext.getSearchContext().getLineageFlags()); + opContext, sourceUrn, direction, 0, MAX_RELATIONSHIPS, finalMaxHops); cache.put(cacheKey, result); log.debug("Refilled Cached lineage entry for: {}.", sourceUrn); } else { @@ -369,14 +357,14 @@ LineageSearchResult getLightningSearchResult( .map(ConjunctiveCriterion::getAnd) .flatMap(CriterionArray::stream) .filter(criterion -> "platform".equals(criterion.getField())) - .map(Criterion::getValue) + .flatMap(criterion -> criterion.getValues().stream()) .collect(Collectors.toSet()); originCriteriaValues = inputFilters.getOr().stream() .map(ConjunctiveCriterion::getAnd) .flatMap(CriterionArray::stream) .filter(criterion -> "origin".equals(criterion.getField())) - .map(Criterion::getValue) + .flatMap(criterion -> criterion.getValues().stream()) .collect(Collectors.toSet()); } boolean isNotFiltered = @@ -770,13 +758,7 @@ public LineageScrollResult scrollAcrossLineage( if (cachedLineageResult == null) { maxHops = maxHops != null ? maxHops : 1000; lineageResult = - _graphService.getLineage( - sourceUrn, - direction, - 0, - MAX_RELATIONSHIPS, - maxHops, - opContext.getSearchContext().getLineageFlags()); + _graphService.getLineage(opContext, sourceUrn, direction, 0, MAX_RELATIONSHIPS, maxHops); if (cacheEnabled) { cache.put( cacheKey, new CachedEntityLineageResult(lineageResult, System.currentTimeMillis())); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java index 2d04e99774050..6de79b6c4b181 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java @@ -87,6 +87,8 @@ public class ESIndexBuilder { @Getter private final GitVersion gitVersion; + @Getter private final int maxReindexHours; + private static final RequestOptions REQUEST_OPTIONS = RequestOptions.DEFAULT.toBuilder() .setRequestConfig(RequestConfig.custom().setSocketTimeout(180 * 1000).build()) @@ -106,6 +108,34 @@ public ESIndexBuilder( boolean enableStructuredPropertiesReindex, ElasticSearchConfiguration elasticSearchConfiguration, GitVersion gitVersion) { + this( + searchClient, + numShards, + numReplicas, + numRetries, + refreshIntervalSeconds, + indexSettingOverrides, + enableIndexSettingsReindex, + enableIndexMappingsReindex, + enableStructuredPropertiesReindex, + elasticSearchConfiguration, + gitVersion, + 0); + } + + public ESIndexBuilder( + RestHighLevelClient searchClient, + int numShards, + int numReplicas, + int numRetries, + int refreshIntervalSeconds, + Map> indexSettingOverrides, + boolean enableIndexSettingsReindex, + boolean enableIndexMappingsReindex, + boolean enableStructuredPropertiesReindex, + ElasticSearchConfiguration elasticSearchConfiguration, + GitVersion gitVersion, + int maxReindexHours) { this._searchClient = searchClient; this.numShards = numShards; this.numReplicas = numReplicas; @@ -117,6 +147,7 @@ public ESIndexBuilder( this.elasticSearchConfiguration = elasticSearchConfiguration; this.enableStructuredPropertiesReindex = enableStructuredPropertiesReindex; this.gitVersion = gitVersion; + this.maxReindexHours = maxReindexHours; RetryConfig config = RetryConfig.custom() @@ -348,10 +379,10 @@ private static String getNextIndexName(String base, long startTime) { private void reindex(ReindexConfig indexState) throws Throwable { final long startTime = System.currentTimeMillis(); - final int maxReindexHours = 8; final long initialCheckIntervalMilli = 1000; final long finalCheckIntervalMilli = 60000; - final long timeoutAt = startTime + (1000 * 60 * 60 * maxReindexHours); + final long timeoutAt = + maxReindexHours > 0 ? startTime + (1000L * 60 * 60 * maxReindexHours) : Long.MAX_VALUE; String tempIndexName = getNextIndexName(indexState.name(), startTime); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java index f1c42a1d277da..61bba11098fae 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java @@ -125,7 +125,7 @@ public BrowseResult browse( @Nullable Filter filters, int from, int size) { - final Map requestMap = SearchUtils.getRequestMap(filters); + final Map> requestMap = SearchUtils.getRequestMap(filters); final OperationContext finalOpContext = opContext.withSearchFlags( @@ -213,7 +213,7 @@ protected SearchRequest constructGroupsSearchRequest( @Nonnull OperationContext opContext, @Nonnull String indexName, @Nonnull String path, - @Nonnull Map requestMap) { + @Nonnull Map> requestMap) { final SearchRequest searchRequest = new SearchRequest(indexName); final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.size(0); @@ -235,7 +235,7 @@ protected SearchRequest constructGroupsSearchRequest( private QueryBuilder buildQueryString( @Nonnull OperationContext opContext, @Nonnull String path, - @Nonnull Map requestMap, + @Nonnull Map> requestMap, boolean isGroupQuery) { final int browseDepthVal = getPathDepth(path); @@ -253,7 +253,7 @@ private QueryBuilder buildQueryString( queryBuilder.filter(QueryBuilders.termQuery(BROWSE_PATH_DEPTH, browseDepthVal)); } - requestMap.forEach((field, val) -> queryBuilder.filter(QueryBuilders.termQuery(field, val))); + requestMap.forEach((field, vals) -> queryBuilder.filter(QueryBuilders.termsQuery(field, vals))); return queryBuilder; } @@ -272,7 +272,7 @@ SearchRequest constructEntitiesSearchRequest( @Nonnull OperationContext opContext, @Nonnull String indexName, @Nonnull String path, - @Nonnull Map requestMap, + @Nonnull Map> requestMap, int from, int size) { final SearchRequest searchRequest = new SearchRequest(indexName); @@ -302,7 +302,7 @@ SearchRequest constructEntitiesSearchRequest( @Nonnull OperationContext opContext, @Nonnull String indexName, @Nonnull String path, - @Nonnull Map requestMap, + @Nonnull Map> requestMap, @Nullable Object[] sort, @Nullable String pitId, @Nonnull String keepAlive, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java index 800d59bacc1d8..367705d369c7c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java @@ -1,14 +1,15 @@ package com.linkedin.metadata.search.elasticsearch.query.filter; import static com.linkedin.metadata.search.utils.QueryUtils.EMPTY_FILTER; -import static com.linkedin.metadata.search.utils.QueryUtils.newCriterion; import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.aspect.GraphRetriever; import com.linkedin.metadata.aspect.models.graph.Edge; import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.search.utils.QueryUtils; import io.datahubproject.metadata.context.OperationContext; @@ -209,7 +210,7 @@ private static void scrollGraph( graphRetriever.consumeRelatedEntities( consumer, entityTypes, - QueryUtils.newDisjunctiveFilter(newCriterion("urn", queryUrnStrs)), + QueryUtils.newDisjunctiveFilter(buildCriterion("urn", Condition.EQUAL, queryUrnStrs)), entityTypes, EMPTY_FILTER, relationshipTypes, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java index fa2eef964e006..39f69ed1716ab 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java @@ -421,8 +421,6 @@ private void addCriterionFiltersToAggregationMetadata( .forEach( value -> addMissingAggregationValueToAggregationMetadata(value, originalAggMetadata)); - } else { - addMissingAggregationValueToAggregationMetadata(criterion.getValue(), originalAggMetadata); } } else { /* diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java index fc29aca411784..63a9c731a2d39 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/update/ESBulkProcessor.java @@ -23,6 +23,8 @@ import org.opensearch.index.query.QueryBuilder; import org.opensearch.index.reindex.BulkByScrollResponse; import org.opensearch.index.reindex.DeleteByQueryRequest; +import org.opensearch.index.reindex.UpdateByQueryRequest; +import org.opensearch.script.Script; @Slf4j @Builder(builderMethodName = "hiddenBuilder") @@ -30,6 +32,7 @@ public class ESBulkProcessor implements Closeable { private static final String ES_WRITES_METRIC = "num_elasticSearch_writes"; private static final String ES_BATCHES_METRIC = "num_elasticSearch_batches_submitted"; private static final String ES_DELETE_EXCEPTION_METRIC = "delete_by_query"; + private static final String ES_UPDATE_EXCEPTION_METRIC = "update_by_query"; private static final String ES_SUBMIT_DELETE_EXCEPTION_METRIC = "submit_delete_by_query_task"; private static final String ES_SUBMIT_REINDEX_METRIC = "reindex_submit"; private static final String ES_REINDEX_SUCCESS_METRIC = "reindex_success"; @@ -97,6 +100,26 @@ public Optional deleteByQuery( return deleteByQuery(queryBuilder, refresh, bulkRequestsLimit, defaultTimeout, indices); } + public Optional updateByQuery( + Script script, QueryBuilder queryBuilder, String... indices) { + // Create an UpdateByQueryRequest + UpdateByQueryRequest updateByQuery = new UpdateByQueryRequest(indices); + updateByQuery.setQuery(queryBuilder); + updateByQuery.setScript(script); + + try { + final BulkByScrollResponse updateResponse = + searchClient.updateByQuery(updateByQuery, RequestOptions.DEFAULT); + MetricUtils.counter(this.getClass(), ES_WRITES_METRIC).inc(updateResponse.getTotal()); + return Optional.of(updateResponse); + } catch (Exception e) { + log.error("ERROR: Failed to update by query. See stacktrace for a more detailed error:", e); + MetricUtils.exceptionCounter(ESBulkProcessor.class, ES_UPDATE_EXCEPTION_METRIC, e); + } + + return Optional.empty(); + } + public Optional deleteByQuery( QueryBuilder queryBuilder, boolean refresh, int limit, TimeValue timeout, String... indices) { DeleteByQueryRequest deleteByQueryRequest = diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index 78bb8cb1e41ae..ace7fa2bc197c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -8,7 +8,7 @@ import static com.linkedin.metadata.search.elasticsearch.indexbuilder.MappingsBuilder.SUBFIELDS; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.KEYWORD_FIELDS; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.PATH_HIERARCHY_FIELDS; -import static com.linkedin.metadata.search.utils.SearchUtils.isUrn; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.google.common.collect.ImmutableList; import com.linkedin.metadata.aspect.AspectRetriever; @@ -25,7 +25,6 @@ import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterContext; import io.datahubproject.metadata.context.OperationContext; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -177,9 +176,7 @@ public static BoolQueryBuilder buildFilterQuery( .getCriteria() .forEach( criterion -> { - if (!criterion.getValue().trim().isEmpty() - || criterion.hasValues() - || criterion.getCondition() == Condition.IS_NULL) { + if (criterion.hasValues() || criterion.getCondition() == Condition.IS_NULL) { andQueryBuilder.must( getQueryBuilderFromCriterion( criterion, @@ -209,7 +206,6 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery( .forEach( criterion -> { if (Set.of(Condition.EXISTS, Condition.IS_NULL).contains(criterion.getCondition()) - || (criterion.hasValue() && !criterion.getValue().trim().isEmpty()) || criterion.hasValues()) { if (!criterion.isNegated()) { // `filter` instead of `must` (enables caching and bypasses scoring) @@ -539,20 +535,13 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { final BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); for (String field : fields) { - Criterion criterionToQuery = new Criterion(); - criterionToQuery.setCondition(criterion.getCondition()); - criterionToQuery.setNegated(criterion.isNegated()); - if (criterion.hasValues()) { - criterionToQuery.setValues(criterion.getValues()); - } - if (criterion.hasValue()) { - criterionToQuery.setValue(criterion.getValue()); - } - criterionToQuery.setField( - toKeywordField(field, isTimeseries, opContext.getAspectRetriever())); orQueryBuilder.should( getQueryBuilderFromCriterionForSingleField( - criterionToQuery, + buildCriterion( + toKeywordField(field, isTimeseries, opContext.getAspectRetriever()), + criterion.getCondition(), + criterion.isNegated(), + criterion.getValues()), isTimeseries, searchableFieldTypes, null, @@ -583,7 +572,7 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField( return QueryBuilders.boolQuery() .must(QueryBuilders.existsQuery(fieldName)) .queryName(queryName != null ? queryName : fieldName); - } else if (criterion.hasValues() || criterion.hasValue()) { + } else if (criterion.hasValues()) { if (condition == Condition.EQUAL) { return buildEqualsConditionFromCriterion( fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever) @@ -643,21 +632,6 @@ private static QueryBuilder buildWildcardQueryWithMultipleValues( return boolQuery; } - private static QueryBuilder buildWildcardQueryWithSingleValue( - @Nonnull final String fieldName, - @Nonnull final Criterion criterion, - final boolean isTimeseries, - @Nullable String queryName, - @Nonnull AspectRetriever aspectRetriever, - String wildcardPattern) { - return QueryBuilders.wildcardQuery( - toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), - String.format( - wildcardPattern, ESUtils.escapeReservedCharacters(criterion.getValue().trim()))) - .queryName(queryName != null ? queryName : fieldName) - .caseInsensitive(true); - } - private static QueryBuilder buildContainsConditionFromCriterion( @Nonnull final String fieldName, @Nonnull final Criterion criterion, @@ -665,11 +639,7 @@ private static QueryBuilder buildContainsConditionFromCriterion( final boolean isTimeseries, @Nonnull AspectRetriever aspectRetriever) { - if (!criterion.getValues().isEmpty()) { - return buildWildcardQueryWithMultipleValues( - fieldName, criterion, isTimeseries, queryName, aspectRetriever, "*%s*"); - } - return buildWildcardQueryWithSingleValue( + return buildWildcardQueryWithMultipleValues( fieldName, criterion, isTimeseries, queryName, aspectRetriever, "*%s*"); } @@ -680,11 +650,7 @@ private static QueryBuilder buildStartsWithConditionFromCriterion( final boolean isTimeseries, @Nonnull AspectRetriever aspectRetriever) { - if (!criterion.getValues().isEmpty()) { - return buildWildcardQueryWithMultipleValues( - fieldName, criterion, isTimeseries, queryName, aspectRetriever, "%s*"); - } - return buildWildcardQueryWithSingleValue( + return buildWildcardQueryWithMultipleValues( fieldName, criterion, isTimeseries, queryName, aspectRetriever, "%s*"); } @@ -695,11 +661,7 @@ private static QueryBuilder buildEndsWithConditionFromCriterion( final boolean isTimeseries, @Nonnull AspectRetriever aspectRetriever) { - if (!criterion.getValues().isEmpty()) { - return buildWildcardQueryWithMultipleValues( - fieldName, criterion, isTimeseries, queryName, aspectRetriever, "*%s"); - } - return buildWildcardQueryWithSingleValue( + return buildWildcardQueryWithMultipleValues( fieldName, criterion, isTimeseries, queryName, aspectRetriever, "*%s"); } @@ -709,19 +671,8 @@ private static QueryBuilder buildEqualsConditionFromCriterion( final boolean isTimeseries, final Map> searchableFieldTypes, @Nonnull AspectRetriever aspectRetriever) { - /* - * If the newer 'values' field of Criterion.pdl is set, then we - * handle using the following code to allow multi-match. - */ - if (!criterion.getValues().isEmpty()) { - return buildEqualsConditionFromCriterionWithValues( - fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever); - } - /* - * Otherwise, we are likely using the deprecated 'value' field. - * We handle using the legacy code path below. - */ - return buildEqualsFromCriterionWithValue(fieldName, criterion, isTimeseries, aspectRetriever); + return buildEqualsConditionFromCriterionWithValues( + fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever); } /** @@ -795,12 +746,7 @@ private static RangeQueryBuilder buildRangeQueryFromCriterion( // Determine criterion value, range query only accepts single value so take first value in // values if multiple - String criterionValueString; - if (!criterion.getValues().isEmpty()) { - criterionValueString = criterion.getValues().get(0).trim(); - } else { - criterionValueString = criterion.getValue().trim(); - } + String criterionValueString = criterion.getValues().get(0).trim(); Object criterionValue; String documentFieldName; if (fieldTypes.contains(BOOLEAN_FIELD_TYPE)) { @@ -829,48 +775,6 @@ private static RangeQueryBuilder buildRangeQueryFromCriterion( } } - /** - * Builds an instance of {@link QueryBuilder} representing an EQUALS condition which was created - * using the deprecated 'value' field of Criterion.pdl model. - * - *

Previously, we supported comma-separate values inside of a single string field, thus we have - * to account for splitting and matching against each value below. - * - *

For all new code, we should be using the new 'values' field for performing multi-match. This - * is simply retained for backwards compatibility of the search API. - */ - @Deprecated - private static QueryBuilder buildEqualsFromCriterionWithValue( - @Nonnull final String fieldName, - @Nonnull final Criterion criterion, - final boolean isTimeseries, - @Nonnull AspectRetriever aspectRetriever) { - // If the value is an URN style value, then we do not attempt to split it by comma (for obvious - // reasons) - if (isUrn(criterion.getValue())) { - return QueryBuilders.matchQuery( - toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), - criterion.getValue().trim()) - .queryName(fieldName) - .analyzer(KEYWORD_ANALYZER); - } - final BoolQueryBuilder filters = new BoolQueryBuilder(); - // Cannot assume the existence of a .keyword or other subfield (unless contains `.`) - // Cannot assume the type of the underlying field or subfield thus KEYWORD_ANALYZER is forced - List fields = - criterion.getField().contains(".") - ? List.of(criterion.getField()) - : List.of(criterion.getField(), criterion.getField() + ".*"); - Arrays.stream(criterion.getValue().trim().split("\\s*,\\s*")) - .forEach( - elem -> - filters.should( - QueryBuilders.multiMatchQuery(elem, fields.toArray(new String[0])) - .queryName(fieldName) - .analyzer(KEYWORD_ANALYZER))); - return filters; - } - @Nonnull public static BoolQueryBuilder applyDefaultSearchFilters( @Nonnull OperationContext opContext, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/SearchUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/SearchUtils.java index add2b1526ab67..4f71a87ca16a8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/SearchUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/SearchUtils.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.io.InputStream; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Predicate; @@ -42,7 +43,7 @@ private SearchUtils() {} * @return a request map */ @Nonnull - public static Map getRequestMap(@Nullable Filter requestParams) { + public static Map> getRequestMap(@Nullable Filter requestParams) { if (requestParams == null) { return Collections.emptyMap(); } @@ -67,7 +68,7 @@ public static Map getRequestMap(@Nullable Filter requestParams) }); return criterionArray.stream() - .collect(Collectors.toMap(Criterion::getField, Criterion::getValue)); + .collect(Collectors.toMap(Criterion::getField, Criterion::getValues)); } public static boolean isUrn(@Nonnull String value) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java new file mode 100644 index 0000000000000..7549aea2007da --- /dev/null +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateGraphIndicesService.java @@ -0,0 +1,452 @@ +package com.linkedin.metadata.service; + +import static com.linkedin.metadata.Constants.FORCE_INDEXING_KEY; +import static com.linkedin.metadata.Constants.STATUS_ASPECT_NAME; +import static com.linkedin.metadata.search.utils.QueryUtils.createRelationshipFilter; +import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.InputField; +import com.linkedin.common.InputFields; +import com.linkedin.common.Status; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.datajob.DataJobInputOutput; +import com.linkedin.dataset.FineGrainedLineage; +import com.linkedin.dataset.FineGrainedLineageArray; +import com.linkedin.dataset.UpstreamLineage; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.aspect.batch.MCLItem; +import com.linkedin.metadata.aspect.models.graph.Edge; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; +import com.linkedin.metadata.entity.SearchIndicesService; +import com.linkedin.metadata.entity.ebean.batch.MCLItemImpl; +import com.linkedin.metadata.graph.GraphIndexUtils; +import com.linkedin.metadata.graph.GraphService; +import com.linkedin.metadata.graph.dgraph.DgraphGraphService; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.EntitySpec; +import com.linkedin.metadata.models.RelationshipFieldSpec; +import com.linkedin.metadata.models.extractor.FieldExtractor; +import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; +import com.linkedin.metadata.query.filter.Filter; +import com.linkedin.metadata.query.filter.RelationshipDirection; +import com.linkedin.metadata.utils.SchemaFieldUtils; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import io.datahubproject.metadata.context.OperationContext; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; +import lombok.Getter; +import lombok.Setter; +import lombok.extern.slf4j.Slf4j; + +@Slf4j +public class UpdateGraphIndicesService implements SearchIndicesService { + private static final String DOWNSTREAM_OF = "DownstreamOf"; + + public static UpdateGraphIndicesService withService(GraphService graphService) { + return new UpdateGraphIndicesService(graphService); + } + + private final GraphService graphService; + + @Getter private final boolean graphStatusEnabled; + + @Getter @Setter @VisibleForTesting private boolean graphDiffMode; + + private static final Set UPDATE_CHANGE_TYPES = + ImmutableSet.of( + ChangeType.CREATE, + ChangeType.CREATE_ENTITY, + ChangeType.UPSERT, + ChangeType.RESTATE, + ChangeType.PATCH); + + public UpdateGraphIndicesService(GraphService graphService) { + this(graphService, true, true); + } + + public UpdateGraphIndicesService( + GraphService graphService, boolean graphDiffMode, boolean graphStatusEnabled) { + this.graphService = graphService; + this.graphDiffMode = graphDiffMode; + this.graphStatusEnabled = graphStatusEnabled; + } + + @Override + public void handleChangeEvent( + @Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) { + try { + MCLItemImpl mclItem = + MCLItemImpl.builder().build(event, opContext.getAspectRetrieverOpt().get()); + + if (UPDATE_CHANGE_TYPES.contains(event.getChangeType())) { + handleUpdateChangeEvent(opContext, mclItem); + + if (graphStatusEnabled && mclItem.getAspectName().equals(STATUS_ASPECT_NAME)) { + handleStatusUpdateChangeEvent(opContext, mclItem); + } + } else if (event.getChangeType() == ChangeType.DELETE) { + handleDeleteChangeEvent(opContext, mclItem); + + if (graphStatusEnabled && mclItem.getAspectName().equals(STATUS_ASPECT_NAME)) { + handleStatusUpdateChangeEvent(opContext, mclItem); + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private void handleStatusUpdateChangeEvent( + @Nonnull final OperationContext opContext, @Nonnull final MCLItem item) { + final Boolean removed; + if (ChangeType.DELETE.equals(item.getChangeType())) { + removed = false; + } else if (ChangeType.RESTATE.equals(item.getChangeType()) + || item.getPreviousRecordTemplate() == null + || !item.getPreviousAspect(Status.class).equals(item.getAspect(Status.class))) { + removed = item.getAspect(Status.class).isRemoved(); + } else { + removed = null; + } + + if (removed != null) { + graphService.setEdgeStatus(item.getUrn(), removed, EdgeUrnType.values()); + } + } + + /** + * This very important method processes {@link MetadataChangeLog} events that represent changes to + * the Metadata Graph. + * + *

In particular, it handles updating the Search, Graph, Timeseries, and System Metadata stores + * in response to a given change type to reflect the changes present in the new aspect. + * + * @param event the change event to be processed. + */ + private void handleUpdateChangeEvent( + @Nonnull final OperationContext opContext, @Nonnull final MCLItem event) throws IOException { + + final AspectSpec aspectSpec = event.getAspectSpec(); + final Urn urn = event.getUrn(); + + RecordTemplate aspect = event.getRecordTemplate(); + RecordTemplate previousAspect = event.getPreviousRecordTemplate(); + + // For all aspects, attempt to update Graph + SystemMetadata systemMetadata = event.getSystemMetadata(); + if (graphDiffMode + && !(graphService instanceof DgraphGraphService) + && (systemMetadata == null + || systemMetadata.getProperties() == null + || !Boolean.parseBoolean(systemMetadata.getProperties().get(FORCE_INDEXING_KEY)))) { + updateGraphServiceDiff(urn, aspectSpec, previousAspect, aspect, event.getMetadataChangeLog()); + } else { + updateGraphService(opContext, urn, aspectSpec, aspect, event.getMetadataChangeLog()); + } + } + + /** + * This very important method processes {@link MetadataChangeLog} deletion events to cleanup the + * Metadata Graph when an aspect or entity is removed. + * + *

In particular, it handles updating the Search, Graph, Timeseries, and System Metadata stores + * to reflect the deletion of a particular aspect. + * + *

Note that if an entity's key aspect is deleted, the entire entity will be purged from + * search, graph, timeseries, etc. + * + * @param event the change event to be processed. + */ + private void handleDeleteChangeEvent( + @Nonnull final OperationContext opContext, @Nonnull final MCLItem event) { + + final EntitySpec entitySpec = event.getEntitySpec(); + final Urn urn = event.getUrn(); + + AspectSpec aspectSpec = entitySpec.getAspectSpec(event.getAspectName()); + if (aspectSpec == null) { + throw new RuntimeException( + String.format( + "Failed to retrieve Aspect Spec for entity with name %s, aspect with name %s. Cannot update indices for MCL.", + urn.getEntityType(), event.getAspectName())); + } + + RecordTemplate aspect = event.getRecordTemplate(); + Boolean isDeletingKey = event.getAspectName().equals(entitySpec.getKeyAspectName()); + + if (!aspectSpec.isTimeseries()) { + deleteGraphData( + opContext, urn, aspectSpec, aspect, isDeletingKey, event.getMetadataChangeLog()); + } + } + + // TODO: remove this method once we implement sourceOverride when creating graph edges + private void updateFineGrainedEdgesAndRelationships( + Urn entity, + FineGrainedLineageArray fineGrainedLineageArray, + List edgesToAdd, + HashMap> urnToRelationshipTypesBeingAdded) { + if (fineGrainedLineageArray != null) { + for (FineGrainedLineage fineGrainedLineage : fineGrainedLineageArray) { + if (!fineGrainedLineage.hasDownstreams() || !fineGrainedLineage.hasUpstreams()) { + break; + } + // Fine grained lineage array is present either on datajob (datajob input/output) or dataset + // We set the datajob as the viaEntity in scenario 1, and the query (if present) as the + // viaEntity in scenario 2 + Urn viaEntity = + entity.getEntityType().equals("dataJob") ? entity : fineGrainedLineage.getQuery(); + // for every downstream, create an edge with each of the upstreams + for (Urn downstream : fineGrainedLineage.getDownstreams()) { + for (Urn upstream : fineGrainedLineage.getUpstreams()) { + // TODO: add edges uniformly across aspects + edgesToAdd.add( + new Edge( + downstream, + upstream, + DOWNSTREAM_OF, + null, + null, + null, + null, + null, + entity, + viaEntity)); + Set relationshipTypes = + urnToRelationshipTypesBeingAdded.getOrDefault(downstream, new HashSet<>()); + relationshipTypes.add(DOWNSTREAM_OF); + urnToRelationshipTypesBeingAdded.put(downstream, relationshipTypes); + } + } + } + } + } + + // TODO: remove this method once we implement sourceOverride and update inputFields aspect + private void updateInputFieldEdgesAndRelationships( + @Nonnull final Urn urn, + @Nonnull final InputFields inputFields, + @Nonnull final List edgesToAdd, + @Nonnull final HashMap> urnToRelationshipTypesBeingAdded) { + if (inputFields.hasFields()) { + for (final InputField field : inputFields.getFields()) { + if (field.hasSchemaFieldUrn() + && field.hasSchemaField() + && field.getSchemaField().hasFieldPath()) { + final Urn sourceFieldUrn = + SchemaFieldUtils.generateSchemaFieldUrn(urn, field.getSchemaField().getFieldPath()); + // TODO: add edges uniformly across aspects + edgesToAdd.add( + new Edge( + sourceFieldUrn, + field.getSchemaFieldUrn(), + DOWNSTREAM_OF, + null, + null, + null, + null, + null)); + final Set relationshipTypes = + urnToRelationshipTypesBeingAdded.getOrDefault(sourceFieldUrn, new HashSet<>()); + relationshipTypes.add(DOWNSTREAM_OF); + urnToRelationshipTypesBeingAdded.put(sourceFieldUrn, relationshipTypes); + } + } + } + } + + private Pair, HashMap>> getEdgesAndRelationshipTypesFromAspect( + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nonnull final RecordTemplate aspect, + @Nonnull final MetadataChangeLog event, + final boolean isNewAspectVersion) { + final List edgesToAdd = new ArrayList<>(); + final HashMap> urnToRelationshipTypesBeingAdded = new HashMap<>(); + + // we need to manually set schemaField <-> schemaField edges for fineGrainedLineage and + // inputFields + // since @Relationship only links between the parent entity urn and something else. + if (aspectSpec.getName().equals(Constants.UPSTREAM_LINEAGE_ASPECT_NAME)) { + UpstreamLineage upstreamLineage = new UpstreamLineage(aspect.data()); + updateFineGrainedEdgesAndRelationships( + urn, + upstreamLineage.getFineGrainedLineages(), + edgesToAdd, + urnToRelationshipTypesBeingAdded); + } else if (aspectSpec.getName().equals(Constants.INPUT_FIELDS_ASPECT_NAME)) { + final InputFields inputFields = new InputFields(aspect.data()); + updateInputFieldEdgesAndRelationships( + urn, inputFields, edgesToAdd, urnToRelationshipTypesBeingAdded); + } else if (aspectSpec.getName().equals(Constants.DATA_JOB_INPUT_OUTPUT_ASPECT_NAME)) { + DataJobInputOutput dataJobInputOutput = new DataJobInputOutput(aspect.data()); + updateFineGrainedEdgesAndRelationships( + urn, + dataJobInputOutput.getFineGrainedLineages(), + edgesToAdd, + urnToRelationshipTypesBeingAdded); + } + + Map> extractedFields = + FieldExtractor.extractFields(aspect, aspectSpec.getRelationshipFieldSpecs()); + + for (Map.Entry> entry : extractedFields.entrySet()) { + Set relationshipTypes = + urnToRelationshipTypesBeingAdded.getOrDefault(urn, new HashSet<>()); + relationshipTypes.add(entry.getKey().getRelationshipName()); + urnToRelationshipTypesBeingAdded.put(urn, relationshipTypes); + final List newEdges = + GraphIndexUtils.extractGraphEdges(entry, aspect, urn, event, isNewAspectVersion); + edgesToAdd.addAll(newEdges); + } + return Pair.of(edgesToAdd, urnToRelationshipTypesBeingAdded); + } + + /** Process snapshot and update graph index */ + private void updateGraphService( + @Nonnull final OperationContext opContext, + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nonnull final RecordTemplate aspect, + @Nonnull final MetadataChangeLog event) { + Pair, HashMap>> edgeAndRelationTypes = + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event, true); + + final List edgesToAdd = edgeAndRelationTypes.getFirst(); + final HashMap> urnToRelationshipTypesBeingAdded = + edgeAndRelationTypes.getSecond(); + + log.debug("Here's the relationship types found {}", urnToRelationshipTypesBeingAdded); + if (!urnToRelationshipTypesBeingAdded.isEmpty()) { + for (Map.Entry> entry : urnToRelationshipTypesBeingAdded.entrySet()) { + graphService.removeEdgesFromNode( + opContext, + entry.getKey(), + new ArrayList<>(entry.getValue()), + newRelationshipFilter( + new Filter().setOr(new ConjunctiveCriterionArray()), + RelationshipDirection.OUTGOING)); + } + edgesToAdd.forEach(graphService::addEdge); + } + } + + private void updateGraphServiceDiff( + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nullable final RecordTemplate oldAspect, + @Nonnull final RecordTemplate newAspect, + @Nonnull final MetadataChangeLog event) { + Pair, HashMap>> oldEdgeAndRelationTypes = null; + if (oldAspect != null) { + oldEdgeAndRelationTypes = + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, oldAspect, event, false); + } + + final List oldEdges = + oldEdgeAndRelationTypes != null + ? oldEdgeAndRelationTypes.getFirst() + : Collections.emptyList(); + final Set oldEdgeSet = new HashSet<>(oldEdges); + + Pair, HashMap>> newEdgeAndRelationTypes = + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, newAspect, event, true); + + final List newEdges = newEdgeAndRelationTypes.getFirst(); + final Set newEdgeSet = new HashSet<>(newEdges); + + // Edges to add + final List additiveDifference = + newEdgeSet.stream().filter(edge -> !oldEdgeSet.contains(edge)).collect(Collectors.toList()); + + // Edges to remove + final List subtractiveDifference = + oldEdgeSet.stream().filter(edge -> !newEdgeSet.contains(edge)).collect(Collectors.toList()); + + // Edges to update + final List mergedEdges = getMergedEdges(oldEdgeSet, newEdgeSet); + + // Remove any old edges that no longer exist first + if (subtractiveDifference.size() > 0) { + log.debug("Removing edges: {}", subtractiveDifference); + subtractiveDifference.forEach(graphService::removeEdge); + } + + // Then add new edges + if (additiveDifference.size() > 0) { + log.debug("Adding edges: {}", additiveDifference); + additiveDifference.forEach(graphService::addEdge); + } + + // Then update existing edges + if (mergedEdges.size() > 0) { + log.debug("Updating edges: {}", mergedEdges); + mergedEdges.forEach(graphService::upsertEdge); + } + } + + private static List getMergedEdges(final Set oldEdgeSet, final Set newEdgeSet) { + final Map oldEdgesMap = + oldEdgeSet.stream() + .map(edge -> Pair.of(edge.hashCode(), edge)) + .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)); + + final List mergedEdges = new ArrayList<>(); + if (!oldEdgesMap.isEmpty()) { + for (Edge newEdge : newEdgeSet) { + if (oldEdgesMap.containsKey(newEdge.hashCode())) { + final Edge oldEdge = oldEdgesMap.get(newEdge.hashCode()); + final Edge mergedEdge = GraphIndexUtils.mergeEdges(oldEdge, newEdge); + mergedEdges.add(mergedEdge); + } + } + } + + return mergedEdges; + } + + private void deleteGraphData( + @Nonnull final OperationContext opContext, + @Nonnull final Urn urn, + @Nonnull final AspectSpec aspectSpec, + @Nonnull final RecordTemplate aspect, + @Nonnull final Boolean isKeyAspect, + @Nonnull final MetadataChangeLog event) { + if (isKeyAspect) { + graphService.removeNode(opContext, urn); + return; + } + + Pair, HashMap>> edgeAndRelationTypes = + getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event, true); + + final HashMap> urnToRelationshipTypesBeingAdded = + edgeAndRelationTypes.getSecond(); + if (urnToRelationshipTypesBeingAdded.size() > 0) { + for (Map.Entry> entry : urnToRelationshipTypesBeingAdded.entrySet()) { + graphService.removeEdgesFromNode( + opContext, + entry.getKey(), + new ArrayList<>(entry.getValue()), + createRelationshipFilter( + new Filter().setOr(new ConjunctiveCriterionArray()), + RelationshipDirection.OUTGOING)); + } + } + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java index 2274b0a7c1cd8..3795fd19316b1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/service/UpdateIndicesService.java @@ -2,59 +2,37 @@ import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.search.transformer.SearchDocumentTransformer.withSystemCreated; -import static com.linkedin.metadata.search.utils.QueryUtils.*; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; -import com.linkedin.common.InputField; -import com.linkedin.common.InputFields; import com.linkedin.common.Status; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.datajob.DataJobInputOutput; -import com.linkedin.dataset.FineGrainedLineage; -import com.linkedin.dataset.FineGrainedLineageArray; -import com.linkedin.dataset.UpstreamLineage; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.batch.AspectsBatch; import com.linkedin.metadata.aspect.batch.MCLItem; -import com.linkedin.metadata.aspect.models.graph.Edge; import com.linkedin.metadata.entity.SearchIndicesService; import com.linkedin.metadata.entity.ebean.batch.MCLItemImpl; -import com.linkedin.metadata.graph.GraphIndexUtils; -import com.linkedin.metadata.graph.GraphService; -import com.linkedin.metadata.graph.dgraph.DgraphGraphService; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; -import com.linkedin.metadata.models.RelationshipFieldSpec; -import com.linkedin.metadata.models.extractor.FieldExtractor; -import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Filter; -import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders; import com.linkedin.metadata.search.transformer.SearchDocumentTransformer; import com.linkedin.metadata.systemmetadata.SystemMetadataService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.timeseries.transformer.TimeseriesAspectTransformer; -import com.linkedin.metadata.utils.SchemaFieldUtils; import com.linkedin.mxe.MetadataChangeLog; import com.linkedin.mxe.SystemMetadata; import com.linkedin.structured.StructuredPropertyDefinition; -import com.linkedin.util.Pair; import io.datahubproject.metadata.context.OperationContext; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Objects; @@ -63,33 +41,25 @@ import java.util.stream.Collectors; import java.util.stream.Stream; import javax.annotation.Nonnull; -import javax.annotation.Nullable; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Value; @Slf4j public class UpdateIndicesService implements SearchIndicesService { - private static final String DOWNSTREAM_OF = "DownstreamOf"; - - private final GraphService _graphService; - private final EntitySearchService _entitySearchService; - private final TimeseriesAspectService _timeseriesAspectService; - private final SystemMetadataService _systemMetadataService; - private final SearchDocumentTransformer _searchDocumentTransformer; - private final EntityIndexBuilders _entityIndexBuilders; - @Nonnull private final String idHashAlgo; - @Value("${featureFlags.graphServiceDiffModeEnabled:true}") - private boolean _graphDiffMode; + @VisibleForTesting @Getter private final UpdateGraphIndicesService updateGraphIndicesService; + private final EntitySearchService entitySearchService; + private final TimeseriesAspectService timeseriesAspectService; + private final SystemMetadataService systemMetadataService; + private final SearchDocumentTransformer searchDocumentTransformer; + private final EntityIndexBuilders entityIndexBuilders; + @Nonnull private final String idHashAlgo; - @Value("${featureFlags.searchServiceDiffModeEnabled:true}") - private boolean _searchDiffMode; + @Getter private final boolean searchDiffMode; - @Value("${structuredProperties.enabled}") - private boolean _structuredPropertiesHookEnabled; + @Getter private final boolean structuredPropertiesHookEnabled; - @Value("${structuredProperties.writeEnabled}") - private boolean _structuredPropertiesWriteEnabled; + @Getter private final boolean structuredPropertiesWriteEnabled; private static final Set UPDATE_CHANGE_TYPES = ImmutableSet.of( @@ -99,31 +69,48 @@ public class UpdateIndicesService implements SearchIndicesService { ChangeType.RESTATE, ChangeType.PATCH); - @VisibleForTesting - public void setGraphDiffMode(boolean graphDiffMode) { - _graphDiffMode = graphDiffMode; - } - - @VisibleForTesting - public void setSearchDiffMode(boolean searchDiffMode) { - _searchDiffMode = searchDiffMode; + public UpdateIndicesService( + UpdateGraphIndicesService updateGraphIndicesService, + EntitySearchService entitySearchService, + TimeseriesAspectService timeseriesAspectService, + SystemMetadataService systemMetadataService, + SearchDocumentTransformer searchDocumentTransformer, + EntityIndexBuilders entityIndexBuilders, + @Nonnull String idHashAlgo) { + this( + updateGraphIndicesService, + entitySearchService, + timeseriesAspectService, + systemMetadataService, + searchDocumentTransformer, + entityIndexBuilders, + idHashAlgo, + true, + true, + true); } public UpdateIndicesService( - GraphService graphService, + UpdateGraphIndicesService updateGraphIndicesService, EntitySearchService entitySearchService, TimeseriesAspectService timeseriesAspectService, SystemMetadataService systemMetadataService, SearchDocumentTransformer searchDocumentTransformer, EntityIndexBuilders entityIndexBuilders, - @Nonnull String idHashAlgo) { - _graphService = graphService; - _entitySearchService = entitySearchService; - _timeseriesAspectService = timeseriesAspectService; - _systemMetadataService = systemMetadataService; - _searchDocumentTransformer = searchDocumentTransformer; - _entityIndexBuilders = entityIndexBuilders; + @Nonnull String idHashAlgo, + boolean searchDiffMode, + boolean structuredPropertiesHookEnabled, + boolean structuredPropertiesWriteEnabled) { + this.updateGraphIndicesService = updateGraphIndicesService; + this.entitySearchService = entitySearchService; + this.timeseriesAspectService = timeseriesAspectService; + this.systemMetadataService = systemMetadataService; + this.searchDocumentTransformer = searchDocumentTransformer; + this.entityIndexBuilders = entityIndexBuilders; this.idHashAlgo = idHashAlgo; + this.searchDiffMode = searchDiffMode; + this.structuredPropertiesHookEnabled = structuredPropertiesHookEnabled; + this.structuredPropertiesWriteEnabled = structuredPropertiesWriteEnabled; } @Override @@ -144,6 +131,9 @@ public void handleChangeEvent( } else if (hookEvent.getChangeType() == ChangeType.DELETE) { handleDeleteChangeEvent(opContext, mclItem); } + + // graph update + updateGraphIndicesService.handleChangeEvent(opContext, event); } } catch (IOException e) { throw new RuntimeException(e); @@ -191,18 +181,6 @@ private void handleUpdateChangeEvent( // Step 2. For all aspects, attempt to update Search updateSearchService(opContext, event); - - // Step 3. For all aspects, attempt to update Graph - SystemMetadata systemMetadata = event.getSystemMetadata(); - if (_graphDiffMode - && !(_graphService instanceof DgraphGraphService) - && (systemMetadata == null - || systemMetadata.getProperties() == null - || !Boolean.parseBoolean(systemMetadata.getProperties().get(FORCE_INDEXING_KEY)))) { - updateGraphServiceDiff(urn, aspectSpec, previousAspect, aspect, event.getMetadataChangeLog()); - } else { - updateGraphService(urn, aspectSpec, aspect, event.getMetadataChangeLog()); - } } public void updateIndexMappings( @@ -210,9 +188,8 @@ public void updateIndexMappings( EntitySpec entitySpec, AspectSpec aspectSpec, RecordTemplate newValue, - RecordTemplate oldValue) - throws IOException { - if (_structuredPropertiesHookEnabled + RecordTemplate oldValue) { + if (structuredPropertiesHookEnabled && STRUCTURED_PROPERTY_ENTITY_NAME.equals(entitySpec.getName()) && STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME.equals(aspectSpec.getName())) { @@ -228,7 +205,7 @@ public void updateIndexMappings( newDefinition.getEntityTypes().removeAll(oldEntityTypes); if (newDefinition.getEntityTypes().size() > 0) { - _entityIndexBuilders + entityIndexBuilders .buildReindexConfigsWithNewStructProp(urn, newDefinition) .forEach( reindexState -> { @@ -237,7 +214,7 @@ public void updateIndexMappings( "Applying new structured property {} to index {}", newDefinition, reindexState.name()); - _entityIndexBuilders.getIndexBuilder().applyMappings(reindexState, false); + entityIndexBuilders.getIndexBuilder().applyMappings(reindexState, false); } catch (IOException e) { throw new RuntimeException(e); } @@ -277,236 +254,10 @@ private void handleDeleteChangeEvent( if (!aspectSpec.isTimeseries()) { deleteSystemMetadata(urn, aspectSpec, isDeletingKey); - deleteGraphData(urn, aspectSpec, aspect, isDeletingKey, event.getMetadataChangeLog()); deleteSearchData(opContext, urn, entitySpec.getName(), aspectSpec, aspect, isDeletingKey); } } - // TODO: remove this method once we implement sourceOverride when creating graph edges - private void updateFineGrainedEdgesAndRelationships( - Urn entity, - FineGrainedLineageArray fineGrainedLineageArray, - List edgesToAdd, - HashMap> urnToRelationshipTypesBeingAdded) { - if (fineGrainedLineageArray != null) { - for (FineGrainedLineage fineGrainedLineage : fineGrainedLineageArray) { - if (!fineGrainedLineage.hasDownstreams() || !fineGrainedLineage.hasUpstreams()) { - break; - } - // Fine grained lineage array is present either on datajob (datajob input/output) or dataset - // We set the datajob as the viaEntity in scenario 1, and the query (if present) as the - // viaEntity in scenario 2 - Urn viaEntity = - entity.getEntityType().equals("dataJob") ? entity : fineGrainedLineage.getQuery(); - // for every downstream, create an edge with each of the upstreams - for (Urn downstream : fineGrainedLineage.getDownstreams()) { - for (Urn upstream : fineGrainedLineage.getUpstreams()) { - // TODO: add edges uniformly across aspects - edgesToAdd.add( - new Edge( - downstream, - upstream, - DOWNSTREAM_OF, - null, - null, - null, - null, - null, - entity, - viaEntity)); - Set relationshipTypes = - urnToRelationshipTypesBeingAdded.getOrDefault(downstream, new HashSet<>()); - relationshipTypes.add(DOWNSTREAM_OF); - urnToRelationshipTypesBeingAdded.put(downstream, relationshipTypes); - } - } - } - } - } - - // TODO: remove this method once we implement sourceOverride and update inputFields aspect - private void updateInputFieldEdgesAndRelationships( - @Nonnull final Urn urn, - @Nonnull final InputFields inputFields, - @Nonnull final List edgesToAdd, - @Nonnull final HashMap> urnToRelationshipTypesBeingAdded) { - if (inputFields.hasFields()) { - for (final InputField field : inputFields.getFields()) { - if (field.hasSchemaFieldUrn() - && field.hasSchemaField() - && field.getSchemaField().hasFieldPath()) { - final Urn sourceFieldUrn = - SchemaFieldUtils.generateSchemaFieldUrn(urn, field.getSchemaField().getFieldPath()); - // TODO: add edges uniformly across aspects - edgesToAdd.add( - new Edge( - sourceFieldUrn, - field.getSchemaFieldUrn(), - DOWNSTREAM_OF, - null, - null, - null, - null, - null)); - final Set relationshipTypes = - urnToRelationshipTypesBeingAdded.getOrDefault(sourceFieldUrn, new HashSet<>()); - relationshipTypes.add(DOWNSTREAM_OF); - urnToRelationshipTypesBeingAdded.put(sourceFieldUrn, relationshipTypes); - } - } - } - } - - private Pair, HashMap>> getEdgesAndRelationshipTypesFromAspect( - @Nonnull final Urn urn, - @Nonnull final AspectSpec aspectSpec, - @Nonnull final RecordTemplate aspect, - @Nonnull final MetadataChangeLog event, - final boolean isNewAspectVersion) { - final List edgesToAdd = new ArrayList<>(); - final HashMap> urnToRelationshipTypesBeingAdded = new HashMap<>(); - - // we need to manually set schemaField <-> schemaField edges for fineGrainedLineage and - // inputFields - // since @Relationship only links between the parent entity urn and something else. - if (aspectSpec.getName().equals(Constants.UPSTREAM_LINEAGE_ASPECT_NAME)) { - UpstreamLineage upstreamLineage = new UpstreamLineage(aspect.data()); - updateFineGrainedEdgesAndRelationships( - urn, - upstreamLineage.getFineGrainedLineages(), - edgesToAdd, - urnToRelationshipTypesBeingAdded); - } else if (aspectSpec.getName().equals(Constants.INPUT_FIELDS_ASPECT_NAME)) { - final InputFields inputFields = new InputFields(aspect.data()); - updateInputFieldEdgesAndRelationships( - urn, inputFields, edgesToAdd, urnToRelationshipTypesBeingAdded); - } else if (aspectSpec.getName().equals(Constants.DATA_JOB_INPUT_OUTPUT_ASPECT_NAME)) { - DataJobInputOutput dataJobInputOutput = new DataJobInputOutput(aspect.data()); - updateFineGrainedEdgesAndRelationships( - urn, - dataJobInputOutput.getFineGrainedLineages(), - edgesToAdd, - urnToRelationshipTypesBeingAdded); - } - - Map> extractedFields = - FieldExtractor.extractFields(aspect, aspectSpec.getRelationshipFieldSpecs()); - - for (Map.Entry> entry : extractedFields.entrySet()) { - Set relationshipTypes = - urnToRelationshipTypesBeingAdded.getOrDefault(urn, new HashSet<>()); - relationshipTypes.add(entry.getKey().getRelationshipName()); - urnToRelationshipTypesBeingAdded.put(urn, relationshipTypes); - final List newEdges = - GraphIndexUtils.extractGraphEdges(entry, aspect, urn, event, isNewAspectVersion); - edgesToAdd.addAll(newEdges); - } - return Pair.of(edgesToAdd, urnToRelationshipTypesBeingAdded); - } - - /** Process snapshot and update graph index */ - private void updateGraphService( - @Nonnull final Urn urn, - @Nonnull final AspectSpec aspectSpec, - @Nonnull final RecordTemplate aspect, - @Nonnull final MetadataChangeLog event) { - Pair, HashMap>> edgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event, true); - - final List edgesToAdd = edgeAndRelationTypes.getFirst(); - final HashMap> urnToRelationshipTypesBeingAdded = - edgeAndRelationTypes.getSecond(); - - log.debug("Here's the relationship types found {}", urnToRelationshipTypesBeingAdded); - if (!urnToRelationshipTypesBeingAdded.isEmpty()) { - for (Map.Entry> entry : urnToRelationshipTypesBeingAdded.entrySet()) { - _graphService.removeEdgesFromNode( - entry.getKey(), - new ArrayList<>(entry.getValue()), - newRelationshipFilter( - new Filter().setOr(new ConjunctiveCriterionArray()), - RelationshipDirection.OUTGOING)); - } - edgesToAdd.forEach(_graphService::addEdge); - } - } - - private void updateGraphServiceDiff( - @Nonnull final Urn urn, - @Nonnull final AspectSpec aspectSpec, - @Nullable final RecordTemplate oldAspect, - @Nonnull final RecordTemplate newAspect, - @Nonnull final MetadataChangeLog event) { - Pair, HashMap>> oldEdgeAndRelationTypes = null; - if (oldAspect != null) { - oldEdgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, oldAspect, event, false); - } - - final List oldEdges = - oldEdgeAndRelationTypes != null - ? oldEdgeAndRelationTypes.getFirst() - : Collections.emptyList(); - final Set oldEdgeSet = new HashSet<>(oldEdges); - - Pair, HashMap>> newEdgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, newAspect, event, true); - - final List newEdges = newEdgeAndRelationTypes.getFirst(); - final Set newEdgeSet = new HashSet<>(newEdges); - - // Edges to add - final List additiveDifference = - newEdgeSet.stream().filter(edge -> !oldEdgeSet.contains(edge)).collect(Collectors.toList()); - - // Edges to remove - final List subtractiveDifference = - oldEdgeSet.stream().filter(edge -> !newEdgeSet.contains(edge)).collect(Collectors.toList()); - - // Edges to update - final List mergedEdges = getMergedEdges(oldEdgeSet, newEdgeSet); - - // Remove any old edges that no longer exist first - if (subtractiveDifference.size() > 0) { - log.debug("Removing edges: {}", subtractiveDifference); - subtractiveDifference.forEach(_graphService::removeEdge); - } - - // Then add new edges - if (additiveDifference.size() > 0) { - log.debug("Adding edges: {}", additiveDifference); - additiveDifference.forEach(_graphService::addEdge); - } - - // Then update existing edges - if (mergedEdges.size() > 0) { - log.debug("Updating edges: {}", mergedEdges); - mergedEdges.forEach(_graphService::upsertEdge); - } - } - - private static List getMergedEdges(final Set oldEdgeSet, final Set newEdgeSet) { - final Map oldEdgesMap = - oldEdgeSet.stream() - .map(edge -> Pair.of(edge.hashCode(), edge)) - .collect(Collectors.toMap(Pair::getFirst, Pair::getSecond)); - - final List mergedEdges = new ArrayList<>(); - if (!oldEdgesMap.isEmpty()) { - for (com.linkedin.metadata.aspect.models.graph.Edge newEdge : newEdgeSet) { - if (oldEdgesMap.containsKey(newEdge.hashCode())) { - final com.linkedin.metadata.aspect.models.graph.Edge oldEdge = - oldEdgesMap.get(newEdge.hashCode()); - final com.linkedin.metadata.aspect.models.graph.Edge mergedEdge = - GraphIndexUtils.mergeEdges(oldEdge, newEdge); - mergedEdges.add(mergedEdge); - } - } - } - - return mergedEdges; - } - /** Process snapshot and update search index */ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem event) { Urn urn = event.getUrn(); @@ -520,7 +271,7 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev Optional previousSearchDocument = Optional.empty(); try { searchDocument = - _searchDocumentTransformer + searchDocumentTransformer .transformAspect(opContext, urn, aspect, aspectSpec, false) .map( objectNode -> @@ -540,16 +291,16 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev return; } - final String docId = _entityIndexBuilders.getIndexConvention().getEntityDocumentId(urn); + final String docId = entityIndexBuilders.getIndexConvention().getEntityDocumentId(urn); - if (_searchDiffMode + if (searchDiffMode && (systemMetadata == null || systemMetadata.getProperties() == null || !Boolean.parseBoolean(systemMetadata.getProperties().get(FORCE_INDEXING_KEY)))) { if (previousAspect != null) { try { previousSearchDocument = - _searchDocumentTransformer.transformAspect( + searchDocumentTransformer.transformAspect( opContext, urn, previousAspect, aspectSpec, false); } catch (Exception e) { log.error( @@ -572,7 +323,7 @@ private void updateSearchService(@Nonnull OperationContext opContext, MCLItem ev searchDocument.get(), previousSearchDocument.orElse(null)) .toString(); - _entitySearchService.upsertDocument(opContext, entityName, finalDocument, docId); + entitySearchService.upsertDocument(opContext, entityName, finalDocument, docId); } /** Process snapshot and update time-series index */ @@ -597,18 +348,18 @@ private void updateTimeseriesFields( .entrySet() .forEach( document -> { - _timeseriesAspectService.upsertDocument( + timeseriesAspectService.upsertDocument( opContext, entityType, aspectName, document.getKey(), document.getValue()); }); } private void updateSystemMetadata( SystemMetadata systemMetadata, Urn urn, AspectSpec aspectSpec, RecordTemplate aspect) { - _systemMetadataService.insert(systemMetadata, urn.toString(), aspectSpec.getName()); + systemMetadataService.insert(systemMetadata, urn.toString(), aspectSpec.getName()); // If processing status aspect update all aspects for this urn to removed if (aspectSpec.getName().equals(Constants.STATUS_ASPECT_NAME)) { - _systemMetadataService.setDocStatus(urn.toString(), ((Status) aspect).isRemoved()); + systemMetadataService.setDocStatus(urn.toString(), ((Status) aspect).isRemoved()); } } @@ -616,41 +367,13 @@ private void deleteSystemMetadata(Urn urn, AspectSpec aspectSpec, Boolean isKeyA if (isKeyAspect) { // Delete all aspects log.debug(String.format("Deleting all system metadata for urn: %s", urn)); - _systemMetadataService.deleteUrn(urn.toString()); + systemMetadataService.deleteUrn(urn.toString()); } else { // Delete all aspects from system metadata service log.debug( String.format( "Deleting system metadata for urn: %s, aspect: %s", urn, aspectSpec.getName())); - _systemMetadataService.deleteAspect(urn.toString(), aspectSpec.getName()); - } - } - - private void deleteGraphData( - @Nonnull final Urn urn, - @Nonnull final AspectSpec aspectSpec, - @Nonnull final RecordTemplate aspect, - @Nonnull final Boolean isKeyAspect, - @Nonnull final MetadataChangeLog event) { - if (isKeyAspect) { - _graphService.removeNode(urn); - return; - } - - Pair, HashMap>> edgeAndRelationTypes = - getEdgesAndRelationshipTypesFromAspect(urn, aspectSpec, aspect, event, true); - - final HashMap> urnToRelationshipTypesBeingAdded = - edgeAndRelationTypes.getSecond(); - if (urnToRelationshipTypesBeingAdded.size() > 0) { - for (Map.Entry> entry : urnToRelationshipTypesBeingAdded.entrySet()) { - _graphService.removeEdgesFromNode( - entry.getKey(), - new ArrayList<>(entry.getValue()), - createRelationshipFilter( - new Filter().setOr(new ConjunctiveCriterionArray()), - RelationshipDirection.OUTGOING)); - } + systemMetadataService.deleteAspect(urn.toString(), aspectSpec.getName()); } } @@ -670,14 +393,14 @@ private void deleteSearchData( } if (isKeyAspect) { - _entitySearchService.deleteDocument(opContext, entityName, docId); + entitySearchService.deleteDocument(opContext, entityName, docId); return; } Optional searchDocument; try { searchDocument = - _searchDocumentTransformer + searchDocumentTransformer .transformAspect(opContext, urn, aspect, aspectSpec, true) .map(Objects::toString); // TODO } catch (Exception e) { @@ -690,18 +413,6 @@ private void deleteSearchData( return; } - _entitySearchService.upsertDocument(opContext, entityName, searchDocument.get(), docId); - } - - private EntitySpec getEventEntitySpec( - @Nonnull OperationContext opContext, @Nonnull final MetadataChangeLog event) { - try { - return opContext.getEntityRegistry().getEntitySpec(event.getEntityType()); - } catch (IllegalArgumentException e) { - throw new RuntimeException( - String.format( - "Failed to retrieve Entity Spec for entity with name %s. Cannot update indices for MCL.", - event.getEntityType())); - } + entitySearchService.upsertDocument(opContext, entityName, searchDocument.get(), docId); } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java index 41addbe197f27..134c65d2b5fae 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java @@ -4,6 +4,7 @@ import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_KEY_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX; +import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; @@ -20,7 +21,6 @@ import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.StructuredPropertyUtils; -import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; import com.linkedin.metadata.query.filter.Criterion; @@ -170,12 +170,11 @@ private Filter getFilter() { final ConjunctiveCriterion conjunction = new ConjunctiveCriterion(); final CriterionArray andCriterion = new CriterionArray(); - final Criterion propertyExistsCriterion = new Criterion(); // Cannot rely on automatic field name since the definition is deleted - propertyExistsCriterion.setField( - STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX - + StructuredPropertyUtils.toElasticsearchFieldName(propertyUrn, definition)); - propertyExistsCriterion.setCondition(Condition.EXISTS); + final Criterion propertyExistsCriterion = + buildExistsCriterion( + STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX + + StructuredPropertyUtils.toElasticsearchFieldName(propertyUrn, definition)); andCriterion.add(propertyExistsCriterion); conjunction.setAnd(andCriterion); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index cb364f41aa218..67518121edae4 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.timeseries.elastic; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.codahale.metrics.Timer; import com.datahub.util.RecordUtils; @@ -339,20 +340,21 @@ public List getAspectValues( filterQueryBuilder.mustNot(QueryBuilders.termQuery(MappingsBuilder.IS_EXPLODED_FIELD, true)); if (startTimeMillis != null) { Criterion startTimeCriterion = - new Criterion() - .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTimeMillis.toString()); + buildCriterion( + MappingsBuilder.TIMESTAMP_MILLIS_FIELD, + Condition.GREATER_THAN_OR_EQUAL_TO, + startTimeMillis.toString()); filterQueryBuilder.must( ESUtils.getQueryBuilderFromCriterion( startTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); } if (endTimeMillis != null) { Criterion endTimeCriterion = - new Criterion() - .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(endTimeMillis.toString()); + buildCriterion( + MappingsBuilder.TIMESTAMP_MILLIS_FIELD, + Condition.LESS_THAN_OR_EQUAL_TO, + endTimeMillis.toString()); + filterQueryBuilder.must( ESUtils.getQueryBuilderFromCriterion( endTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); @@ -575,20 +577,21 @@ public TimeseriesScrollResult scrollAspects( if (startTimeMillis != null) { Criterion startTimeCriterion = - new Criterion() - .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTimeMillis.toString()); + buildCriterion( + MappingsBuilder.TIMESTAMP_MILLIS_FIELD, + Condition.GREATER_THAN_OR_EQUAL_TO, + startTimeMillis.toString()); + filterQueryBuilder.filter( ESUtils.getQueryBuilderFromCriterion( startTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); } if (endTimeMillis != null) { Criterion endTimeCriterion = - new Criterion() - .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(endTimeMillis.toString()); + buildCriterion( + MappingsBuilder.TIMESTAMP_MILLIS_FIELD, + Condition.LESS_THAN_OR_EQUAL_TO, + endTimeMillis.toString()); filterQueryBuilder.filter( ESUtils.getQueryBuilderFromCriterion( endTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/UsageServiceUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/UsageServiceUtil.java index abeefae3cf39f..54f97f45219ac 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/UsageServiceUtil.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/UsageServiceUtil.java @@ -2,6 +2,7 @@ import static com.linkedin.metadata.Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH; import static com.linkedin.metadata.Constants.MAX_JACKSON_STRING_SIZE; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.codahale.metrics.Timer; import com.fasterxml.jackson.core.JsonProcessingException; @@ -97,26 +98,19 @@ public static UsageQueryResult query( // 1. Populate the filter. This is common for all queries. Filter filter = new Filter(); ArrayList criteria = new ArrayList<>(); - Criterion hasUrnCriterion = - new Criterion() - .setField("urn") - .setCondition(Condition.EQUAL) - .setValues(new StringArray(resource)); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, resource); + criteria.add(hasUrnCriterion); if (startTime != null) { Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValues(new StringArray(startTime.toString())); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); + criteria.add(startTimeCriterion); } if (endTime != null) { Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValues(new StringArray(endTime.toString())); + buildCriterion(ES_FIELD_TIMESTAMP, Condition.LESS_THAN_OR_EQUAL_TO, endTime.toString()); criteria.add(endTimeCriterion); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java index fe3608a2cf71d..d585ff1ce8383 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java @@ -37,6 +37,7 @@ import io.datahubproject.metadata.context.OperationContext; import io.datahubproject.test.metadata.context.TestOperationContexts; import java.sql.Timestamp; +import java.util.List; import java.util.Map; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -79,14 +80,15 @@ public void testDeleteUniqueRefGeneratesValidMCP() { Mockito.when( _graphService.findRelatedEntities( - null, - newFilter("urn", container.toString()), - null, - EMPTY_FILTER, - ImmutableList.of(), - newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), - 0, - 10000)) + any(OperationContext.class), + nullable(List.class), + eq(newFilter("urn", container.toString())), + nullable(List.class), + eq(EMPTY_FILTER), + eq(ImmutableList.of()), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(0), + eq((10000)))) .thenReturn(mockRelatedEntities); final EntityResponse entityResponse = new EntityResponse(); @@ -195,14 +197,15 @@ public void testDeleteSearchReferences() { new RelatedEntitiesResult(0, 0, 0, ImmutableList.of()); Mockito.when( _graphService.findRelatedEntities( - null, - newFilter("urn", form.toString()), - null, - EMPTY_FILTER, - ImmutableList.of(), - newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), - 0, - 10000)) + any(OperationContext.class), + nullable(List.class), + eq(newFilter("urn", form.toString())), + nullable(List.class), + eq(EMPTY_FILTER), + eq(ImmutableList.of()), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(0), + eq((10000)))) .thenReturn(mockRelatedEntities); final DeleteReferencesResponse response = @@ -249,14 +252,15 @@ public void testDeleteNoSearchReferences() { new RelatedEntitiesResult(0, 0, 0, ImmutableList.of()); Mockito.when( _graphService.findRelatedEntities( - null, - newFilter("urn", form.toString()), - null, - EMPTY_FILTER, - ImmutableList.of(), - newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), - 0, - 10000)) + any(OperationContext.class), + nullable(List.class), + eq(newFilter("urn", form.toString())), + nullable(List.class), + eq(EMPTY_FILTER), + eq(ImmutableList.of()), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(0), + eq((10000)))) .thenReturn(mockRelatedEntities); final DeleteReferencesResponse response = @@ -308,14 +312,15 @@ public void testDeleteSearchReferencesDryRun() { new RelatedEntitiesResult(0, 0, 0, ImmutableList.of()); Mockito.when( _graphService.findRelatedEntities( - null, - newFilter("urn", form.toString()), - null, - EMPTY_FILTER, - ImmutableList.of(), - newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING), - 0, - 10000)) + any(OperationContext.class), + nullable(List.class), + eq(newFilter("urn", form.toString())), + nullable(List.class), + eq(EMPTY_FILTER), + eq(ImmutableList.of()), + eq(newRelationshipFilter(EMPTY_FILTER, RelationshipDirection.INCOMING)), + eq(0), + eq((10000)))) .thenReturn(mockRelatedEntities); final DeleteReferencesResponse response = diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java index 5d9a5079f2a3b..64ab95b5c6843 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBase.java @@ -11,16 +11,16 @@ import com.linkedin.common.urn.DataJobUrn; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.metadata.aspect.models.graph.Edge; import com.linkedin.metadata.aspect.models.graph.RelatedEntity; import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.graph.dgraph.DgraphGraphService; import com.linkedin.metadata.graph.neo4j.Neo4jGraphService; -import com.linkedin.metadata.query.LineageFlags; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.query.filter.RelationshipFilter; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URISyntaxException; import java.time.Duration; import java.util.ArrayList; @@ -47,7 +47,6 @@ import javax.annotation.Nullable; import org.springframework.test.context.testng.AbstractTestNGSpringContextTests; import org.testng.Assert; -import org.testng.annotations.BeforeMethod; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; @@ -273,20 +272,16 @@ public int compare(RelatedEntity left, RelatedEntity right) { /** Any source and destination type value. */ protected static @Nullable List anyType = null; - protected final GraphQueryConfiguration _graphQueryConfiguration = getGraphQueryConfiguration(); + protected static final GraphQueryConfiguration _graphQueryConfiguration = + getGraphQueryConfiguration(); + protected static final OperationContext operationContext = + TestOperationContexts.systemContextNoSearchAuthorization(); /** Timeout used to test concurrent ops in doTestConcurrentOp. */ protected Duration getTestConcurrentOpTimeout() { return Duration.ofMinutes(1); } - @BeforeMethod - public void disableAssert() { - PathSpecBasedSchemaAnnotationVisitor.class - .getClassLoader() - .setClassAssertionStatus(PathSpecBasedSchemaAnnotationVisitor.class.getName(), false); - } - @Test public void testStaticUrns() { assertNotNull(dataset1Urn); @@ -502,6 +497,7 @@ public void testAddEdge( RelatedEntitiesResult relatedOutgoing = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -514,6 +510,7 @@ public void testAddEdge( RelatedEntitiesResult relatedIncoming = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -531,6 +528,7 @@ public void testPopulatedGraphService() throws Exception { RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -549,6 +547,7 @@ public void testPopulatedGraphService() throws Exception { downstreamOfSchemaFieldTwoVia, downstreamOfSchemaFieldTwo)); RelatedEntitiesResult relatedIncomingEntitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -574,13 +573,13 @@ public void testPopulatedGraphService() throws Exception { downstreamOfSchemaFieldOne)); EntityLineageResult viaNodeResult = service.getLineage( + operationContext, schemaFieldUrnOne, LineageDirection.UPSTREAM, new GraphFilters(List.of("schemaField")), 0, 1000, - 100, - null); + 100); // Multi-path enabled assertEquals(viaNodeResult.getRelationships().size(), 2); // First one is via node @@ -589,13 +588,13 @@ public void testPopulatedGraphService() throws Exception { EntityLineageResult viaNodeResultNoMulti = getGraphService(false) .getLineage( + operationContext, schemaFieldUrnOne, LineageDirection.UPSTREAM, new GraphFilters(List.of("schemaField")), 0, 1000, - 100, - null); + 100); // Multi-path disabled, still has two because via flow creates both edges in response assertEquals(viaNodeResultNoMulti.getRelationships().size(), 2); @@ -612,12 +611,12 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { GraphService service = getLineagePopulatedGraphService(); EntityLineageResult upstreamLineage = - service.getLineage(dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 1); + service.getLineage(operationContext, dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 1); assertEquals(upstreamLineage.getTotal().intValue(), 0); assertEquals(upstreamLineage.getRelationships().size(), 0); EntityLineageResult downstreamLineage = - service.getLineage(dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); + service.getLineage(operationContext, dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 3); assertEquals(downstreamLineage.getRelationships().size(), 3); Map relationships = @@ -630,7 +629,8 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { assertTrue(relationships.containsKey(dataJobTwoUrn)); assertEquals(relationships.get(dataJobTwoUrn).getType(), consumes); - upstreamLineage = service.getLineage(dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 1); + upstreamLineage = + service.getLineage(operationContext, dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 1); assertEquals(upstreamLineage.getTotal().intValue(), 2); assertEquals(upstreamLineage.getRelationships().size(), 2); relationships = @@ -641,11 +641,13 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { assertTrue(relationships.containsKey(dataJobOneUrn)); assertEquals(relationships.get(dataJobOneUrn).getType(), produces); - downstreamLineage = service.getLineage(dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); + downstreamLineage = + service.getLineage(operationContext, dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 0); assertEquals(downstreamLineage.getRelationships().size(), 0); - upstreamLineage = service.getLineage(dataJobOneUrn, LineageDirection.UPSTREAM, 0, 1000, 1); + upstreamLineage = + service.getLineage(operationContext, dataJobOneUrn, LineageDirection.UPSTREAM, 0, 1000, 1); assertEquals(upstreamLineage.getTotal().intValue(), 2); assertEquals(upstreamLineage.getRelationships().size(), 2); relationships = @@ -656,7 +658,9 @@ public void testPopulatedGraphServiceGetLineage() throws Exception { assertTrue(relationships.containsKey(dataset2Urn)); assertEquals(relationships.get(dataset2Urn).getType(), consumes); - downstreamLineage = service.getLineage(dataJobOneUrn, LineageDirection.DOWNSTREAM, 0, 1000, 1); + downstreamLineage = + service.getLineage( + operationContext, dataJobOneUrn, LineageDirection.DOWNSTREAM, 0, 1000, 1); assertEquals(downstreamLineage.getTotal().intValue(), 3); assertEquals(downstreamLineage.getRelationships().size(), 3); relationships = @@ -834,6 +838,7 @@ private void doTestFindRelatedEntities( RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, anyType, sourceEntityFilter, anyType, @@ -1118,6 +1123,7 @@ private void doTestFindRelatedEntities( RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, sourceType, EMPTY_FILTER, destinationType, @@ -1139,6 +1145,7 @@ private void doTestFindRelatedEntitiesEntityType( @Nonnull RelatedEntity... expectedEntities) { RelatedEntitiesResult actualEntities = service.findRelatedEntities( + operationContext, sourceType, EMPTY_FILTER, destinationType, @@ -1244,6 +1251,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allOutgoingRelatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1263,6 +1271,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allIncomingRelatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1289,6 +1298,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allUnknownRelationshipTypeRelatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1301,6 +1311,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult someUnknownRelationshipTypeRelatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1325,6 +1336,7 @@ public void testFindRelatedEntitiesNoRelationshipTypes() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1340,6 +1352,7 @@ public void testFindRelatedEntitiesNoRelationshipTypes() throws Exception { // did not get any related urns? RelatedEntitiesResult relatedEntitiesAll = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1358,6 +1371,7 @@ public void testFindRelatedEntitiesAllFilters() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), newFilter("urn", dataset1UrnString), ImmutableList.of(userType), @@ -1371,6 +1385,7 @@ public void testFindRelatedEntitiesAllFilters() throws Exception { relatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), newFilter("urn", dataset1UrnString), ImmutableList.of(userType), @@ -1389,6 +1404,7 @@ public void testFindRelatedEntitiesMultipleEntityTypes() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType, userType), newFilter("urn", dataset1UrnString), ImmutableList.of(datasetType, userType), @@ -1402,6 +1418,7 @@ public void testFindRelatedEntitiesMultipleEntityTypes() throws Exception { relatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType, userType), newFilter("urn", dataset1UrnString), ImmutableList.of(datasetType, userType), @@ -1421,6 +1438,7 @@ public void testFindRelatedEntitiesOffsetAndCount() throws Exception { // populated graph asserted in testPopulatedGraphService RelatedEntitiesResult allRelatedEntities = service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), EMPTY_FILTER, anyType, @@ -1436,6 +1454,7 @@ public void testFindRelatedEntitiesOffsetAndCount() throws Exception { idx -> individualRelatedEntities.addAll( service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), EMPTY_FILTER, anyType, @@ -1540,6 +1559,7 @@ public void testRemoveEdgesFromNode( RelatedEntitiesResult actualOutgoingRelatedUrnsBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1550,6 +1570,7 @@ public void testRemoveEdgesFromNode( 100); RelatedEntitiesResult actualIncomingRelatedUrnsBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1566,6 +1587,7 @@ public void testRemoveEdgesFromNode( // we expect these do not change RelatedEntitiesResult relatedEntitiesOfOtherOutgoingRelationTypesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1576,6 +1598,7 @@ public void testRemoveEdgesFromNode( 100); RelatedEntitiesResult relatedEntitiesOfOtherIncomingRelationTypesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1585,11 +1608,13 @@ public void testRemoveEdgesFromNode( 0, 100); - service.removeEdgesFromNode(nodeToRemoveFrom, relationTypes, relationshipFilter); + service.removeEdgesFromNode( + operationContext, nodeToRemoveFrom, relationTypes, relationshipFilter); syncAfterWrite(); RelatedEntitiesResult actualOutgoingRelatedUrnsAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1600,6 +1625,7 @@ public void testRemoveEdgesFromNode( 100); RelatedEntitiesResult actualIncomingRelatedUrnsAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1616,6 +1642,7 @@ public void testRemoveEdgesFromNode( // assert these did not change RelatedEntitiesResult relatedEntitiesOfOtherOutgoingRelationTypesAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1626,6 +1653,7 @@ public void testRemoveEdgesFromNode( 100); RelatedEntitiesResult relatedEntitiesOfOtherIncomingRelationTypesAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1650,6 +1678,7 @@ public void testRemoveEdgesFromNodeNoRelationshipTypes() throws Exception { // populated graph asserted in testPopulatedGraphService RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1661,12 +1690,15 @@ public void testRemoveEdgesFromNodeNoRelationshipTypes() throws Exception { // can be replaced with a single removeEdgesFromNode and undirectedRelationships once supported // by all implementations - service.removeEdgesFromNode(nodeToRemoveFrom, Collections.emptyList(), outgoingRelationships); - service.removeEdgesFromNode(nodeToRemoveFrom, Collections.emptyList(), incomingRelationships); + service.removeEdgesFromNode( + operationContext, nodeToRemoveFrom, Collections.emptyList(), outgoingRelationships); + service.removeEdgesFromNode( + operationContext, nodeToRemoveFrom, Collections.emptyList(), incomingRelationships); syncAfterWrite(); RelatedEntitiesResult relatedOutgoingEntitiesAfterRemove = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1680,13 +1712,20 @@ public void testRemoveEdgesFromNodeNoRelationshipTypes() throws Exception { // does the test actually test something? is the Collections.emptyList() the only reason why we // did not see changes? service.removeEdgesFromNode( - nodeToRemoveFrom, Arrays.asList(downstreamOf, hasOwner, knowsUser), outgoingRelationships); + operationContext, + nodeToRemoveFrom, + Arrays.asList(downstreamOf, hasOwner, knowsUser), + outgoingRelationships); service.removeEdgesFromNode( - nodeToRemoveFrom, Arrays.asList(downstreamOf, hasOwner, knowsUser), incomingRelationships); + operationContext, + nodeToRemoveFrom, + Arrays.asList(downstreamOf, hasOwner, knowsUser), + incomingRelationships); syncAfterWrite(); RelatedEntitiesResult relatedOutgoingEntitiesAfterRemoveAll = service.findRelatedEntities( + operationContext, anyType, newFilter("urn", nodeToRemoveFrom.toString()), anyType, @@ -1706,6 +1745,7 @@ public void testRemoveEdgesFromUnknownNode() throws Exception { // populated graph asserted in testPopulatedGraphService RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1718,13 +1758,20 @@ public void testRemoveEdgesFromUnknownNode() throws Exception { // can be replaced with a single removeEdgesFromNode and undirectedRelationships once supported // by all implementations service.removeEdgesFromNode( - nodeToRemoveFrom, Arrays.asList(downstreamOf, hasOwner, knowsUser), outgoingRelationships); + operationContext, + nodeToRemoveFrom, + Arrays.asList(downstreamOf, hasOwner, knowsUser), + outgoingRelationships); service.removeEdgesFromNode( - nodeToRemoveFrom, Arrays.asList(downstreamOf, hasOwner, knowsUser), incomingRelationships); + operationContext, + nodeToRemoveFrom, + Arrays.asList(downstreamOf, hasOwner, knowsUser), + incomingRelationships); syncAfterWrite(); RelatedEntitiesResult relatedOutgoingEntitiesAfterRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1740,13 +1787,14 @@ public void testRemoveEdgesFromUnknownNode() throws Exception { public void testRemoveNode() throws Exception { GraphService service = getPopulatedGraphService(); - service.removeNode(dataset2Urn); + service.removeNode(operationContext, dataset2Urn); syncAfterWrite(); // assert the modified graph // All downstreamOf, hasOwner, knowsUser relationships minus datasetTwo's, outgoing assertEqualsAnyOrder( service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1768,6 +1816,7 @@ public void testRemoveUnknownNode() throws Exception { // populated graph asserted in testPopulatedGraphService RelatedEntitiesResult entitiesBeforeRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1777,11 +1826,12 @@ public void testRemoveUnknownNode() throws Exception { 0, 100); - service.removeNode(unknownUrn); + service.removeNode(operationContext, unknownUrn); syncAfterWrite(); RelatedEntitiesResult entitiesAfterRemove = service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, anyType, @@ -1806,6 +1856,7 @@ public void testClear() throws Exception { // again assertEqualsAnyOrder( service.findRelatedEntities( + operationContext, ImmutableList.of(datasetType), EMPTY_FILTER, anyType, @@ -1817,6 +1868,7 @@ public void testClear() throws Exception { Collections.emptyList()); assertEqualsAnyOrder( service.findRelatedEntities( + operationContext, ImmutableList.of(userType), EMPTY_FILTER, anyType, @@ -1828,6 +1880,7 @@ public void testClear() throws Exception { Collections.emptyList()); assertEqualsAnyOrder( service.findRelatedEntities( + operationContext, anyType, EMPTY_FILTER, ImmutableList.of(userType), @@ -1891,6 +1944,7 @@ public void testConcurrentAddEdge() throws Exception { RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -1937,6 +1991,7 @@ public void testConcurrentRemoveEdgesFromNode() throws Exception { // assert the graph is there RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -1956,6 +2011,7 @@ public void testConcurrentRemoveEdgesFromNode() throws Exception { edge -> () -> service.removeEdgesFromNode( + operationContext, edge.getSource(), Collections.singletonList(edge.getRelationshipType()), outgoingRelationships)); @@ -1965,6 +2021,7 @@ public void testConcurrentRemoveEdgesFromNode() throws Exception { // assert the graph is gone RelatedEntitiesResult relatedEntitiesAfterDeletion = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -1998,6 +2055,7 @@ public void testConcurrentRemoveNodes() throws Exception { // assert the graph is there RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -2013,13 +2071,14 @@ public void testConcurrentRemoveNodes() throws Exception { // remove all nodes concurrently // nodes will be removed multiple times Stream operations = - edges.stream().map(edge -> () -> service.removeNode(edge.getSource())); + edges.stream().map(edge -> () -> service.removeNode(operationContext, edge.getSource())); doTestConcurrentOp(operations); syncAfterWrite(); // assert the graph is gone RelatedEntitiesResult relatedEntitiesAfterDeletion = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -2094,12 +2153,12 @@ public void testPopulatedGraphServiceGetLineageMultihop(Boolean attemptMultiPath (!((service instanceof Neo4jGraphService) || (service instanceof DgraphGraphService))); EntityLineageResult upstreamLineage = - service.getLineage(dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 2); + service.getLineage(operationContext, dataset1Urn, LineageDirection.UPSTREAM, 0, 1000, 2); assertEquals(upstreamLineage.getTotal().intValue(), 0); assertEquals(upstreamLineage.getRelationships().size(), 0); EntityLineageResult downstreamLineage = - service.getLineage(dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); + service.getLineage(operationContext, dataset1Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); assertEquals(downstreamLineage.getTotal().intValue(), 5); assertEquals(downstreamLineage.getRelationships().size(), 5); @@ -2124,7 +2183,8 @@ public void testPopulatedGraphServiceGetLineageMultihop(Boolean attemptMultiPath assertTrue(relationships.containsKey(dataJobTwoUrn)); assertEquals(relationships.get(dataJobTwoUrn).getDegree(), 1); - upstreamLineage = service.getLineage(dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 2); + upstreamLineage = + service.getLineage(operationContext, dataset3Urn, LineageDirection.UPSTREAM, 0, 1000, 2); assertEquals(upstreamLineage.getTotal().intValue(), 3); assertEquals(upstreamLineage.getRelationships().size(), 3); relationships = @@ -2137,7 +2197,8 @@ public void testPopulatedGraphServiceGetLineageMultihop(Boolean attemptMultiPath assertTrue(relationships.containsKey(dataJobOneUrn)); assertEquals(relationships.get(dataJobOneUrn).getDegree(), 1); - downstreamLineage = service.getLineage(dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); + downstreamLineage = + service.getLineage(operationContext, dataset3Urn, LineageDirection.DOWNSTREAM, 0, 1000, 2); assertEquals(downstreamLineage.getTotal().intValue(), 0); assertEquals(downstreamLineage.getRelationships().size(), 0); } @@ -2156,6 +2217,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { Set expectedRelatedEntities = convertEdgesToRelatedEntities(edges); RelatedEntitiesResult relatedEntities = service.findRelatedEntities( + operationContext, null, EMPTY_FILTER, null, @@ -2169,9 +2231,13 @@ public void testHighlyConnectedGraphWalk() throws Exception { expectedRelatedEntities); Urn root = dataset1Urn; + OperationContext limitedHopOpContext = + operationContext.withLineageFlags(f -> f.setEntitiesExploredPerHopLimit(5)); + EntityLineageResult lineageResult = getGraphService(false) .getLineage( + limitedHopOpContext, root, LineageDirection.UPSTREAM, new GraphFilters( @@ -2183,8 +2249,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { .collect(Collectors.toList())), 0, 1000, - 100, - new LineageFlags().setEntitiesExploredPerHopLimit(5)); + 100); // Unable to explore all paths because multi is disabled, but will be at least 5 since it will // explore 5 edges assertTrue( @@ -2201,6 +2266,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { EntityLineageResult lineageResultMulti = getGraphService(true) .getLineage( + limitedHopOpContext, root, LineageDirection.UPSTREAM, new GraphFilters( @@ -2212,8 +2278,7 @@ public void testHighlyConnectedGraphWalk() throws Exception { .collect(Collectors.toList())), 0, 1000, - 100, - new LineageFlags().setEntitiesExploredPerHopLimit(5)); + 100); assertTrue( lineageResultMulti.getRelationships().size() >= 5 diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java index e4cefaa1feaa1..a4a93b29f50c6 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/GraphServiceTestBaseNoVia.java @@ -1,8 +1,10 @@ package com.linkedin.metadata.graph; import static com.linkedin.metadata.search.utils.QueryUtils.*; +import static org.mockito.Mockito.mock; import static org.testng.Assert.*; +import io.datahubproject.metadata.context.OperationContext; import java.util.Arrays; import java.util.Collections; import org.testng.annotations.DataProvider; @@ -220,6 +222,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allOutgoingRelatedEntities = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -243,6 +246,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allIncomingRelatedEntities = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -269,6 +273,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult allUnknownRelationshipTypeRelatedEntities = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -281,6 +286,7 @@ public void testFindRelatedEntitiesRelationshipTypes() throws Exception { RelatedEntitiesResult someUnknownRelationshipTypeRelatedEntities = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -306,6 +312,7 @@ public void testPopulatedGraphService() throws Exception { RelatedEntitiesResult relatedOutgoingEntitiesBeforeRemove = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -328,6 +335,7 @@ public void testPopulatedGraphService() throws Exception { downstreamOfSchemaFieldTwo)); RelatedEntitiesResult relatedIncomingEntitiesBeforeRemove = service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, @@ -360,13 +368,14 @@ public void testPopulatedGraphService() throws Exception { public void testRemoveNode() throws Exception { GraphService service = getPopulatedGraphService(); - service.removeNode(dataset2Urn); + service.removeNode(mock(OperationContext.class), dataset2Urn); syncAfterWrite(); // assert the modified graph // All downstreamOf, hasOwner, knowsUser relationships minus datasetTwo's, outgoing assertEqualsAnyOrder( service.findRelatedEntities( + mock(OperationContext.class), anyType, EMPTY_FILTER, anyType, diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java similarity index 92% rename from metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java rename to metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java index 0bf7df1fc8e7c..b8e3a6e107128 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/search/ESGraphQueryDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAOTest.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.graph.search; +package com.linkedin.metadata.graph.elastic; import com.google.common.collect.ImmutableList; import com.google.common.io.Resources; @@ -9,10 +9,11 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.graph.GraphFilters; -import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO; import com.linkedin.metadata.models.registry.LineageRegistry; import com.linkedin.metadata.query.LineageFlags; import com.linkedin.metadata.query.filter.RelationshipDirection; +import io.datahubproject.metadata.context.OperationContext; +import io.datahubproject.test.metadata.context.TestOperationContexts; import java.net.URL; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -21,6 +22,7 @@ import java.util.Map; import org.opensearch.index.query.QueryBuilder; import org.testng.Assert; +import org.testng.annotations.BeforeTest; import org.testng.annotations.Test; public class ESGraphQueryDAOTest { @@ -34,8 +36,15 @@ public class ESGraphQueryDAOTest { private static final String TEST_QUERY_FILE_FULL_MULTIPLE_FILTERS = "elasticsearch/sample_filters/lineage_query_filters_full_multiple_filters.json"; + private OperationContext operationContext; + + @BeforeTest + public void init() { + operationContext = TestOperationContexts.systemContextNoSearchAuthorization(); + } + @Test - private static void testGetQueryForLineageFullArguments() throws Exception { + private void testGetQueryForLineageFullArguments() throws Exception { URL urlLimited = Resources.getResource(TEST_QUERY_FILE_LIMITED); String expectedQueryLimited = Resources.toString(urlLimited, StandardCharsets.UTF_8); @@ -108,21 +117,26 @@ private static void testGetQueryForLineageFullArguments() throws Exception { QueryBuilder fullBuilder = graphQueryDAO.getLineageQuery( + operationContext.withLineageFlags( + f -> new LineageFlags().setEndTimeMillis(endTime).setStartTimeMillis(startTime)), urnsPerEntityType, edgesPerEntityType, - graphFilters, - new LineageFlags().setEndTimeMillis(endTime).setStartTimeMillis(startTime)); + graphFilters); QueryBuilder fullBuilderEmptyFilters = graphQueryDAO.getLineageQuery( - urnsPerEntityType, edgesPerEntityType, GraphFilters.emptyGraphFilters, null); + operationContext, + urnsPerEntityType, + edgesPerEntityType, + GraphFilters.emptyGraphFilters); QueryBuilder fullBuilderMultipleFilters = graphQueryDAO.getLineageQuery( + operationContext.withLineageFlags( + f -> new LineageFlags().setEndTimeMillis(endTime).setStartTimeMillis(startTime)), urnsPerEntityTypeMultiple, edgesPerEntityTypeMultiple, - graphFiltersMultiple, - new LineageFlags().setEndTimeMillis(endTime).setStartTimeMillis(startTime)); + graphFiltersMultiple); Assert.assertEquals(limitedBuilder.toString(), expectedQueryLimited); Assert.assertEquals(fullBuilder.toString(), expectedQueryFull); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAOTest.java new file mode 100644 index 0000000000000..ac96257e8ec41 --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ESGraphWriteDAOTest.java @@ -0,0 +1,33 @@ +package com.linkedin.metadata.graph.elastic; + +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; + +import com.linkedin.metadata.config.search.GraphQueryConfiguration; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import com.linkedin.metadata.utils.elasticsearch.IndexConvention; +import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; +import org.opensearch.index.query.QueryBuilders; +import org.opensearch.script.Script; +import org.testng.annotations.Test; + +public class ESGraphWriteDAOTest { + public static final IndexConvention TEST_INDEX_CONVENTION = IndexConventionImpl.noPrefix("md5"); + + @Test + public void testUpdateByQuery() { + ESBulkProcessor mockBulkProcess = mock(ESBulkProcessor.class); + GraphQueryConfiguration config = new GraphQueryConfiguration(); + config.setGraphStatusEnabled(true); + ESGraphWriteDAO test = new ESGraphWriteDAO(TEST_INDEX_CONVENTION, mockBulkProcess, 0, config); + + test.updateByQuery(new Script("test"), QueryBuilders.boolQuery()); + + verify(mockBulkProcess) + .updateByQuery( + eq(new Script("test")), eq(QueryBuilders.boolQuery()), eq("graph_service_v1")); + verifyNoMoreInteractions(mockBulkProcess); + } +} diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java new file mode 100644 index 0000000000000..1f53b9c4e999e --- /dev/null +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java @@ -0,0 +1,98 @@ +package com.linkedin.metadata.graph.elastic; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; +import com.linkedin.metadata.entity.TestEntityRegistry; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.models.registry.LineageRegistry; +import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; +import com.linkedin.metadata.search.elasticsearch.update.ESBulkProcessor; +import com.linkedin.metadata.utils.elasticsearch.IndexConventionImpl; +import java.util.Set; +import org.mockito.ArgumentCaptor; +import org.opensearch.index.query.BoolQueryBuilder; +import org.opensearch.index.query.ExistsQueryBuilder; +import org.opensearch.index.query.QueryBuilder; +import org.opensearch.index.query.TermQueryBuilder; +import org.opensearch.script.Script; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.BeforeTest; +import org.testng.annotations.Test; + +public class ElasticSearchGraphServiceTest { + + private ElasticSearchGraphService test; + private ESBulkProcessor mockESBulkProcessor; + private ESGraphWriteDAO mockWriteDAO; + private ESGraphQueryDAO mockReadDAO; + + @BeforeTest + public void beforeTest() { + EntityRegistry entityRegistry = new TestEntityRegistry(); + mockESBulkProcessor = mock(ESBulkProcessor.class); + mockWriteDAO = mock(ESGraphWriteDAO.class); + mockReadDAO = mock(ESGraphQueryDAO.class); + test = + new ElasticSearchGraphService( + new LineageRegistry(entityRegistry), + mockESBulkProcessor, + IndexConventionImpl.noPrefix("md5"), + mockWriteDAO, + mockReadDAO, + mock(ESIndexBuilder.class), + "md5"); + } + + @BeforeMethod + public void beforeMethod() { + reset(mockESBulkProcessor, mockWriteDAO, mockReadDAO); + } + + @Test + public void testSetEdgeStatus() { + final Urn testUrn = UrnUtils.getUrn("urn:li:container:test"); + for (boolean removed : Set.of(true, false)) { + test.setEdgeStatus(testUrn, removed, EdgeUrnType.values()); + + ArgumentCaptor