Merge branch 'master' into Fivetran-connector-performance-optimization

datahub-project · Jun 5, 2024 · 578d7f6 · 578d7f6
2 parents bc42e87 + 4a4d41c
commit 578d7f6
Show file tree

Hide file tree

Showing 36 changed files with 1,957 additions and 885 deletions.
diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java
@@ -239,6 +239,9 @@ public static Filter viewFilter(
       return null;
     }
     DataHubViewInfo viewInfo = resolveView(opContext, viewService, UrnUtils.getUrn(viewUrn));
+    if (viewInfo == null) {
+      return null;
+    }
     Filter result = SearchUtils.combineFilters(null, viewInfo.getDefinition().getFilter());
     return result;
   }

diff --git a/.../java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java b/.../java/com/linkedin/datahub/graphql/types/mappers/UrnSearchAcrossLineageResultsMapper.java
@@ -71,6 +71,7 @@ private SearchAcrossLineageResult mapResult(
         .setDegrees(new ArrayList<>(searchEntity.getDegrees()))
         .setExplored(Boolean.TRUE.equals(searchEntity.isExplored()))
         .setIgnoredAsHop(Boolean.TRUE.equals(searchEntity.isIgnoredAsHop()))
+        .setTruncatedChildren(Boolean.TRUE.equals(searchEntity.isTruncatedChildren()))
         .build();
   }
 }
diff --git a/datahub-graphql-core/src/main/resources/search.graphql b/datahub-graphql-core/src/main/resources/search.graphql
@@ -747,6 +747,11 @@ type SearchAcrossLineageResult {
   """
   explored: Boolean!
 
+  """
+  Indicates this destination node has additional unexplored child relationships
+  """
+  truncatedChildren: Boolean!
+
   """
   Whether this relationship was ignored as a hop
   """

diff --git a/metadata-ingestion/docs/sources/iceberg/iceberg.md b/metadata-ingestion/docs/sources/iceberg/iceberg.md
@@ -10,7 +10,7 @@ This ingestion source maps the following Source System Concepts to DataHub Conce
 | Source Concept | DataHub Concept | Notes |
 | -- | -- | -- |
 | `iceberg` | [Data Platform](docs/generated/metamodel/entities/dataPlatform.md) | |
-| Table | [Dataset](docs/generated/metamodel/entities/dataset.md) | Each Iceberg table maps to a Dataset named using the parent folders.  If a table is stored under `my/namespace/table`, the dataset name will be `my.namespace.table`.  If a [Platform Instance](https://datahubproject.io/docs/platform-instances/) is configured, it will be used as a prefix: `<platform_instance>.my.namespace.table`. |
+| Table | [Dataset](docs/generated/metamodel/entities/dataset.md) | An Iceberg table is registered inside a catalog using a name, where the catalog is responsible for creating, dropping and renaming tables.  Catalogs manage a collection of tables that are usually grouped into namespaces.  The name of a table is mapped to a Dataset name.  If a [Platform Instance](https://datahubproject.io/docs/platform-instances/) is configured, it will be used as a prefix: `<platform_instance>.my.namespace.table`. |
 | [Table property](https://iceberg.apache.org/docs/latest/configuration/#table-properties) | [User (a.k.a CorpUser)](docs/generated/metamodel/entities/corpuser.md) | The value of a table property can be used as the name of a CorpUser owner.  This table property name can be configured with the source option `user_ownership_property`. |
 | [Table property](https://iceberg.apache.org/docs/latest/configuration/#table-properties) | CorpGroup | The value of a table property can be used as the name of a CorpGroup owner.  This table property name can be configured with the source option `group_ownership_property`. |
 | Table parent folders (excluding [warehouse catalog location](https://iceberg.apache.org/docs/latest/configuration/#catalog-properties)) | Container | Available in a future release | 

diff --git a/metadata-ingestion/docs/sources/iceberg/iceberg_recipe.yml b/metadata-ingestion/docs/sources/iceberg/iceberg_recipe.yml
@@ -3,23 +3,30 @@ source:
   config:
     env: PROD
     catalog:
-      name: my_iceberg_catalog
-      type: rest
-      # Catalog configuration follows pyiceberg's documentation (https://py.iceberg.apache.org/configuration)
-      config:
+      # REST catalog configuration example using S3 storage
+      my_rest_catalog:
+        type: rest
+        # Catalog configuration follows pyiceberg's documentation (https://py.iceberg.apache.org/configuration)
         uri: http://localhost:8181
         s3.access-key-id: admin
         s3.secret-access-key: password
         s3.region: us-east-1
         warehouse: s3a://warehouse/wh/
         s3.endpoint: http://localhost:9000
-    platform_instance: my_iceberg_catalog
+      # SQL catalog configuration example using Azure datalake storage and a PostgreSQL database
+      # my_sql_catalog:
+      #   type: sql
+      #   uri: postgresql+psycopg2://user:password@sqldatabase.postgres.database.azure.com:5432/icebergcatalog
+      #   adlfs.tenant-id: <Azure tenant ID>
+      #   adlfs.account-name: <Azure storage account name>
+      #   adlfs.client-id: <Azure Client/Application ID>
+      #   adlfs.client-secret: <Azure Client Secret>
+    platform_instance: my_rest_catalog
     table_pattern:
       allow:
         - marketing.*
     profiling:
       enabled: true
 
 sink:
-  # sink configs
-
+  # sink configs