Skip to content

Commit

Permalink
Merge branch 'datahub-project:master' into loading-logo-chaos
Browse files Browse the repository at this point in the history
  • Loading branch information
eburairu authored Mar 16, 2022
2 parents 860f3bd + 431ba4b commit d328a93
Show file tree
Hide file tree
Showing 287 changed files with 8,933 additions and 1,493 deletions.
7 changes: 6 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ HOSTED_DOCS_ONLY-->
## Introduction

DataHub is an open-source metadata platform for the modern data stack. Read about the architectures of different metadata systems and why DataHub excels [here](https://engineering.linkedin.com/blog/2020/datahub-popular-metadata-architectures-explained). Also read our
[LinkedIn Engineering blog post](https://engineering.linkedin.com/blog/2019/data-hub), check out our [Strata presentation](https://speakerdeck.com/shirshanka/the-evolution-of-metadata-linkedins-journey-strata-nyc-2019) and watch our [Crunch Conference Talk](https://www.youtube.com/watch?v=OB-O0Y6OYDE). You should also visit [DataHub Architecture](docs/architecture/architecture.md) to get a better understanding of how DataHub is implemented and [DataHub Onboarding Guide](docs/modeling/extending-the-metadata-model.md) to understand how to extend DataHub for your own use cases.
[LinkedIn Engineering blog post](https://engineering.linkedin.com/blog/2019/data-hub), check out our [Strata presentation](https://speakerdeck.com/shirshanka/the-evolution-of-metadata-linkedins-journey-strata-nyc-2019) and watch our [Crunch Conference Talk](https://www.youtube.com/watch?v=OB-O0Y6OYDE). You should also visit [DataHub Architecture](docs/architecture/architecture.md) to get a better understanding of how DataHub is implemented.

## Quickstart

Expand Down Expand Up @@ -99,6 +99,10 @@ Check out DataHub's [Features](docs/features.md) & [Roadmap](https://feature-req

We welcome contributions from the community. Please refer to our [Contributing Guidelines](docs/CONTRIBUTING.md) for more details. We also have a [contrib](contrib) directory for incubating experimental features.

### Extending

If you need to understand how to extend our model with custom types, please see [Extending the Metadata Model](docs/modeling/extending-the-metadata-model.md)

## Community

Join our [slack workspace](https://slack.datahubproject.io) for discussions and important announcements. You can also find out more about our upcoming [town hall meetings](docs/townhalls.md) and view past recordings.
Expand All @@ -109,6 +113,7 @@ Here are the companies that have officially adopted DataHub. Please feel free to

- [Adevinta](https://www.adevinta.com/)
- [Banksalad](https://www.banksalad.com)
- [Cabify](https://cabify.tech/)
- [DefinedCrowd](http://www.definedcrowd.com)
- [DFDS](https://www.dfds.com/)
- [Expedia Group](http://expedia.com)
Expand Down
35 changes: 26 additions & 9 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ project.ext.externalDependency = [
'javaxInject' : 'javax.inject:javax.inject:1',
'jerseyCore': 'org.glassfish.jersey.core:jersey-client:2.25.1',
'jerseyGuava': 'org.glassfish.jersey.bundles.repackaged:jersey-guava:2.25.1',
'jettyJaas': 'org.eclipse.jetty:jetty-jaas:9.4.28.v20200408',
'jettyJaas': 'org.eclipse.jetty:jetty-jaas:9.4.32.v20200930',
'jgrapht': 'org.jgrapht:jgrapht-core:1.5.1',
'jsonSimple': 'com.googlecode.json-simple:json-simple:1.1.1',
'junitJupiterApi': "org.junit.jupiter:junit-jupiter-api:$junitJupiterVersion",
'junitJupiterParams': "org.junit.jupiter:junit-jupiter-params:$junitJupiterVersion",
Expand All @@ -98,6 +99,7 @@ project.ext.externalDependency = [
'neo4jJavaDriver': 'org.neo4j.driver:neo4j-java-driver:4.0.1',
'opentelemetryApi': 'io.opentelemetry:opentelemetry-api:1.0.0',
'opentelemetryAnnotations': 'io.opentelemetry:opentelemetry-extension-annotations:1.0.0',
'opentracingJdbc':'io.opentracing.contrib:opentracing-jdbc:0.2.15',
'parseqTest': 'com.linkedin.parseq:parseq:3.0.7:test',
'parquet': 'org.apache.parquet:parquet-avro:1.12.0',
'picocli': 'info.picocli:picocli:4.5.0',
Expand All @@ -110,11 +112,12 @@ project.ext.externalDependency = [
'pac4j': 'org.pac4j:pac4j-oidc:3.6.0',
'playPac4j': 'org.pac4j:play-pac4j_2.11:7.0.1',
'postgresql': 'org.postgresql:postgresql:42.3.3',
'protobuf': 'com.google.protobuf:protobuf-java:3.19.3',
'reflections': 'org.reflections:reflections:0.9.9',
'resilience4j': 'io.github.resilience4j:resilience4j-retry:1.7.1',
'rythmEngine': 'org.rythmengine:rythm-engine:1.3.0',
'servletApi': 'javax.servlet:javax.servlet-api:3.1.0',
'shiroCore': 'org.apache.shiro:shiro-core:1.7.1',
'shiroCore': 'org.apache.shiro:shiro-core:1.8.0',
'sparkSql' : 'org.apache.spark:spark-sql_2.11:2.4.8',
'sparkHive' : 'org.apache.spark:spark-hive_2.11:2.4.8',
'springBeans': 'org.springframework:spring-beans:5.2.3.RELEASE',
Expand All @@ -140,6 +143,7 @@ project.ext.externalDependency = [
'typesafeConfig':'com.typesafe:config:1.4.1',
'wiremock':'com.github.tomakehurst:wiremock:2.10.0',
'zookeeper': 'org.apache.zookeeper:zookeeper:3.4.14'

]

allprojects {
Expand Down Expand Up @@ -194,14 +198,27 @@ subprojects {
}
}

tasks.withType(JavaCompile).configureEach {
javaCompiler = javaToolchains.compilerFor {
languageVersion = JavaLanguageVersion.of(8)
if (project.name != 'datahub-protobuf') {
tasks.withType(JavaCompile).configureEach {
javaCompiler = javaToolchains.compilerFor {
languageVersion = JavaLanguageVersion.of(8)
}
}
}
tasks.withType(Test).configureEach {
javaLauncher = javaToolchains.launcherFor {
languageVersion = JavaLanguageVersion.of(8)
tasks.withType(Test).configureEach {
javaLauncher = javaToolchains.launcherFor {
languageVersion = JavaLanguageVersion.of(8)
}
}
} else {
tasks.withType(JavaCompile).configureEach {
javaCompiler = javaToolchains.compilerFor {
languageVersion = JavaLanguageVersion.of(11)
}
}
tasks.withType(Test).configureEach {
javaLauncher = javaToolchains.launcherFor {
languageVersion = JavaLanguageVersion.of(11)
}
}
}

Expand Down
24 changes: 19 additions & 5 deletions datahub-frontend/app/auth/sso/oidc/OidcCallbackLogic.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import play.mvc.Result;
import auth.sso.SsoManager;

import static com.linkedin.metadata.Constants.*;
import static play.mvc.Results.*;
import static auth.AuthUtils.*;

Expand Down Expand Up @@ -125,16 +126,14 @@ private Result handleOidcCallback(
if (oidcConfigs.isJitProvisioningEnabled()) {
log.debug("Just-in-time provisioning is enabled. Beginning provisioning process...");
CorpUserSnapshot extractedUser = extractUser(corpUserUrn, profile);
tryProvisionUser(extractedUser);
if (oidcConfigs.isExtractGroupsEnabled()) {
// Extract groups & provision them.
List<CorpGroupSnapshot> extractedGroups = extractGroups(profile);
tryProvisionGroups(extractedGroups);
if (extractedGroups.size() > 0) {
// Associate group with the user logging in.
extractedUser.getAspects().add(CorpUserAspect.create(createGroupMembership(extractedGroups)));
}
// Add users to groups on DataHub. Note that this clears existing group membership for a user if it already exists.
updateGroupMembership(corpUserUrn, createGroupMembership(extractedGroups));
}
tryProvisionUser(extractedUser);
} else if (oidcConfigs.isPreProvisioningRequired()) {
// We should only allow logins for user accounts that have been pre-provisioned
log.debug("Pre Provisioning is required. Beginning validation of extracted user...");
Expand Down Expand Up @@ -372,6 +371,21 @@ private void tryProvisionGroups(List<CorpGroupSnapshot> corpGroups) {
}
}

private void updateGroupMembership(Urn urn, GroupMembership groupMembership) {
log.debug(String.format("Updating group membership for user %s", urn));
final MetadataChangeProposal proposal = new MetadataChangeProposal();
proposal.setEntityUrn(urn);
proposal.setEntityType(CORP_USER_ENTITY_NAME);
proposal.setAspectName(GROUP_MEMBERSHIP_ASPECT_NAME);
proposal.setAspect(GenericAspectUtils.serializeAspect(groupMembership));
proposal.setChangeType(ChangeType.UPSERT);
try {
_entityClient.ingestProposal(proposal, _systemAuthentication);
} catch (RemoteInvocationException e) {
throw new RuntimeException(String.format("Failed to update group membership for user with urn %s", urn), e);
}
}

private void verifyPreProvisionedUser(CorpuserUrn urn) {
// Validate that the user exists in the system (there is more than just a key aspect for them, as of today).
try {
Expand Down
2 changes: 1 addition & 1 deletion datahub-frontend/app/auth/sso/oidc/OidcConfigs.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public class OidcConfigs extends SsoConfigs {
private static final String DEFAULT_OIDC_CLIENT_AUTHENTICATION_METHOD = "client_secret_basic";
private static final String DEFAULT_OIDC_JIT_PROVISIONING_ENABLED = "true";
private static final String DEFAULT_OIDC_PRE_PROVISIONING_REQUIRED = "false";
private static final String DEFAULT_OIDC_EXTRACT_GROUPS_ENABLED = "true";
private static final String DEFAULT_OIDC_EXTRACT_GROUPS_ENABLED = "false"; // False since extraction of groups can overwrite existing group membership.
private static final String DEFAULT_OIDC_GROUPS_CLAIM = "groups";

private String clientId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import com.linkedin.datahub.graphql.generated.AggregationMetadata;
import com.linkedin.datahub.graphql.generated.Aspect;
import com.linkedin.datahub.graphql.generated.Assertion;
import com.linkedin.datahub.graphql.generated.AutoCompleteResultForEntity;
import com.linkedin.datahub.graphql.generated.AutoCompleteResults;
import com.linkedin.datahub.graphql.generated.BrowseResults;
import com.linkedin.datahub.graphql.generated.Chart;
import com.linkedin.datahub.graphql.generated.ChartInfo;
Expand Down Expand Up @@ -514,6 +516,10 @@ private void configureContainerResolvers(final RuntimeWiring.Builder builder) {
.type("Container", typeWiring -> typeWiring
.dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient))
.dataFetcher("entities", new ContainerEntitiesResolver(entityClient))
.dataFetcher("domain", new LoadableTypeResolver<>(domainType, (env) -> {
final Container container = env.getSource();
return container.getDomain() != null ? container.getDomain().getUrn() : null;
}))
.dataFetcher("platform",
new LoadableTypeResolver<>(dataPlatformType,
(env) -> ((Container) env.getSource()).getPlatform().getUrn()))
Expand Down Expand Up @@ -730,7 +736,20 @@ private void configureGenericEntityResolvers(final RuntimeWiring.Builder builder
(env) -> ((ListDomainsResult) env.getSource()).getDomains().stream()
.map(Domain::getUrn)
.collect(Collectors.toList())))
)
.type("AutoCompleteResults", typeWiring -> typeWiring
.dataFetcher("entities",
new EntityTypeBatchResolver(
new ArrayList<>(entityTypes),
(env) -> ((AutoCompleteResults) env.getSource()).getEntities()))
)
.type("AutoCompleteResultForEntity", typeWiring -> typeWiring
.dataFetcher("entities",
new EntityTypeBatchResolver(
new ArrayList<>(entityTypes),
(env) -> ((AutoCompleteResultForEntity) env.getSource()).getEntities()))
);
;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ private List<AnalyticsChart> getProductAnalyticsCharts(Authentication authentica

final List<NamedLine> wauTimeseries =
_analyticsService.getTimeseriesChart(_analyticsService.getUsageIndexName(), twoMonthsDateRange, weeklyInterval,
Optional.empty(), ImmutableMap.of(), Optional.of("browserId"));
Optional.empty(), ImmutableMap.of(), Collections.emptyMap(), Optional.of("browserId"));
charts.add(TimeSeriesChart.builder()
.setTitle(wauTitle)
.setDateRange(twoMonthsDateRange)
Expand All @@ -90,7 +90,8 @@ private List<AnalyticsChart> getProductAnalyticsCharts(Authentication authentica

final List<NamedLine> searchesTimeseries =
_analyticsService.getTimeseriesChart(_analyticsService.getUsageIndexName(), lastWeekDateRange, dailyInterval,
Optional.empty(), ImmutableMap.of("type", ImmutableList.of(searchEventType)), Optional.empty());
Optional.empty(), ImmutableMap.of("type", ImmutableList.of(searchEventType)), Collections.emptyMap(),
Optional.empty());
charts.add(TimeSeriesChart.builder()
.setTitle(searchesTitle)
.setDateRange(lastWeekDateRange)
Expand All @@ -104,24 +105,26 @@ private List<AnalyticsChart> getProductAnalyticsCharts(Authentication authentica

final List<Row> topSearchQueries =
_analyticsService.getTopNTableChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange),
"query.keyword", ImmutableMap.of("type", ImmutableList.of(searchEventType)), Optional.empty(), 10,
AnalyticsUtil::buildCellWithSearchLandingPage);
"query.keyword", ImmutableMap.of("type", ImmutableList.of(searchEventType)), Collections.emptyMap(),
Optional.empty(), 10, AnalyticsUtil::buildCellWithSearchLandingPage);
charts.add(TableChart.builder().setTitle(topSearchTitle).setColumns(columns).setRows(topSearchQueries).build());

// Chart 4: Bar Graph Chart
final String sectionViewsTitle = "Section Views across Entity Types";
final List<NamedBar> sectionViewsPerEntityType =
_analyticsService.getBarChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange),
ImmutableList.of("entityType.keyword", "section.keyword"),
ImmutableMap.of("type", ImmutableList.of("EntitySectionViewEvent")), Optional.empty(), true);
ImmutableMap.of("type", ImmutableList.of("EntitySectionViewEvent")), Collections.emptyMap(),
Optional.empty(), true);
charts.add(BarChart.builder().setTitle(sectionViewsTitle).setBars(sectionViewsPerEntityType).build());

// Chart 5: Bar Graph Chart
final String actionsByTypeTitle = "Actions by Entity Type";
final List<NamedBar> eventsByEventType =
_analyticsService.getBarChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange),
ImmutableList.of("entityType.keyword", "actionType.keyword"),
ImmutableMap.of("type", ImmutableList.of("EntityActionEvent")), Optional.empty(), true);
ImmutableMap.of("type", ImmutableList.of("EntityActionEvent")), Collections.emptyMap(), Optional.empty(),
true);
charts.add(BarChart.builder().setTitle(actionsByTypeTitle).setBars(eventsByEventType).build());

// Chart 6: Table Chart
Expand All @@ -131,7 +134,7 @@ private List<AnalyticsChart> getProductAnalyticsCharts(Authentication authentica
final List<Row> topViewedDatasets =
_analyticsService.getTopNTableChart(_analyticsService.getUsageIndexName(), Optional.of(lastWeekDateRange),
"entityUrn.keyword", ImmutableMap.of("type", ImmutableList.of("EntityViewEvent"), "entityType.keyword",
ImmutableList.of(EntityType.DATASET.name())), Optional.empty(), 10,
ImmutableList.of(EntityType.DATASET.name())), Collections.emptyMap(), Optional.empty(), 10,
AnalyticsUtil::buildCellWithEntityLandingPage);
AnalyticsUtil.hydrateDisplayNameForTable(_entityClient, topViewedDatasets, Constants.DATASET_ENTITY_NAME,
ImmutableSet.of(Constants.DATASET_KEY_ASPECT_NAME), AnalyticsUtil::getDatasetName, authentication);
Expand All @@ -145,7 +148,8 @@ private List<AnalyticsChart> getGlobalMetadataAnalyticsCharts(Authentication aut
// Chart 1: Entities per domain
final List<NamedBar> entitiesPerDomain =
_analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(),
ImmutableList.of("domains.keyword", "platform.keyword"), Collections.emptyMap(), Optional.empty(), false);
ImmutableList.of("domains.keyword", "platform.keyword"), Collections.emptyMap(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty(), false);
AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, entitiesPerDomain, Constants.DOMAIN_ENTITY_NAME,
ImmutableSet.of(Constants.DOMAIN_PROPERTIES_ASPECT_NAME), AnalyticsUtil::getDomainName, authentication);
AnalyticsUtil.hydrateDisplayNameForSegments(_entityClient, entitiesPerDomain, Constants.DATA_PLATFORM_ENTITY_NAME,
Expand All @@ -157,7 +161,8 @@ private List<AnalyticsChart> getGlobalMetadataAnalyticsCharts(Authentication aut
// Chart 2: Entities per platform
final List<NamedBar> entitiesPerPlatform =
_analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(),
ImmutableList.of("platform.keyword"), Collections.emptyMap(), Optional.empty(), false);
ImmutableList.of("platform.keyword"), Collections.emptyMap(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty(), false);
AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, entitiesPerPlatform, Constants.DATA_PLATFORM_ENTITY_NAME,
ImmutableSet.of(Constants.DATA_PLATFORM_INFO_ASPECT_NAME), AnalyticsUtil::getPlatformName, authentication);
if (!entitiesPerPlatform.isEmpty()) {
Expand All @@ -167,7 +172,8 @@ private List<AnalyticsChart> getGlobalMetadataAnalyticsCharts(Authentication aut
// Chart 3: Entities per term
final List<NamedBar> entitiesPerTerm =
_analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(),
ImmutableList.of("glossaryTerms.keyword"), Collections.emptyMap(), Optional.empty(), false);
ImmutableList.of("glossaryTerms.keyword"), Collections.emptyMap(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty(), false);
AnalyticsUtil.hydrateDisplayNameForBars(_entityClient, entitiesPerTerm, Constants.GLOSSARY_TERM_ENTITY_NAME,
ImmutableSet.of(Constants.GLOSSARY_TERM_KEY_ASPECT_NAME), AnalyticsUtil::getTermName, authentication);
if (!entitiesPerTerm.isEmpty()) {
Expand All @@ -177,7 +183,8 @@ private List<AnalyticsChart> getGlobalMetadataAnalyticsCharts(Authentication aut
// Chart 4: Entities per fabric type
final List<NamedBar> entitiesPerEnv =
_analyticsService.getBarChart(_analyticsService.getAllEntityIndexName(), Optional.empty(),
ImmutableList.of("origin.keyword"), Collections.emptyMap(), Optional.empty(), false);
ImmutableList.of("origin.keyword"), Collections.emptyMap(),
ImmutableMap.of("removed", ImmutableList.of("true")), Optional.empty(), false);
if (entitiesPerEnv.size() > 1) {
charts.add(BarChart.builder().setTitle("Entities per Environment").setBars(entitiesPerEnv).build());
}
Expand Down
Loading

0 comments on commit d328a93

Please sign in to comment.