From 4296373c5fc8fcd7c6f67e856810dccd4a5046e1 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Fri, 27 Sep 2024 11:31:25 -0500 Subject: [PATCH 1/8] config(rest-api): enable authentication and api authorization by default (#11484) --- datahub-frontend/run/frontend.env | 2 +- docker/datahub-frontend/env/docker.env | 4 +- docker/datahub-gms/env/docker.env | 4 +- docker/docker-compose-with-cassandra.yml | 2 + .../docker-compose-without-neo4j.override.yml | 1 + ...ompose-without-neo4j.postgres.override.yml | 1 + docker/docker-compose-without-neo4j.yml | 1 + docker/docker-compose.dev.yml | 2 +- docker/docker-compose.override.yml | 1 + docker/docker-compose.yml | 1 + docker/profiles/docker-compose.gms.yml | 1 + .../docker-compose-m1.quickstart.yml | 2 +- ...er-compose-without-neo4j-m1.quickstart.yml | 1 + ...ocker-compose-without-neo4j.quickstart.yml | 1 + .../quickstart/docker-compose.quickstart.yml | 2 +- docs/how/updating-datahub.md | 3 + .../structuredproperties.py | 101 +++-- .../src/datahub/cli/timeline_cli.py | 5 +- .../kafka/MceConsumerApplicationTest.java | 8 +- .../src/main/resources/application.yaml | 8 +- .../GlobalControllerExceptionHandler.java | 7 + smoke-test/conftest.py | 39 ++ smoke-test/requests_wrapper/__init__.py | 4 - smoke-test/requests_wrapper/constants.py | 0 .../utils_requests_wrapper.py | 27 -- smoke-test/run-quickstart.sh | 4 +- smoke-test/test_e2e.py | 304 ++++++--------- smoke-test/test_rapid.py | 26 +- .../tests/assertions/assertions_test.py | 67 ++-- .../assertions/custom_assertions_test.py | 42 +- smoke-test/tests/browse/browse_test.py | 29 +- smoke-test/tests/cli/datahub_cli.py | 46 ++- smoke-test/tests/cli/datahub_graph_test.py | 47 +-- .../cli/delete_cmd/test_timeseries_delete.py | 46 ++- .../ingest_cmd/test_timeseries_rollback.py | 33 +- .../cli/user_groups_cmd/test_group_cmd.py | 44 ++- .../cli/user_groups_cmd/test_user_cmd.py | 28 +- smoke-test/tests/conftest.py | 32 -- .../tests/containers/containers_test.py | 56 ++- smoke-test/tests/cypress/integration_test.py | 40 +- .../tests/dataproduct/test_dataproduct.py | 64 ++- smoke-test/tests/delete/delete_test.py | 35 +- .../tests/deprecation/deprecation_test.py | 47 +-- smoke-test/tests/domains/domains_test.py | 69 ++-- smoke-test/tests/incidents/incidents_test.py | 55 +-- smoke-test/tests/lineage/test_lineage.py | 148 ++++--- .../managed_ingestion_test.py | 120 +++--- smoke-test/tests/openapi/test_openapi.py | 49 +-- smoke-test/tests/openapi/v1/timeline.json | 6 +- smoke-test/tests/openapi/v2/timeline.json | 6 +- smoke-test/tests/patch/common_patch_tests.py | 325 +++++++-------- .../tests/patch/test_datajob_patches.py | 131 ++++--- .../tests/patch/test_dataset_patches.py | 369 +++++++++--------- smoke-test/tests/policies/test_policies.py | 63 ++- .../tests/privileges/test_privileges.py | 23 +- smoke-test/tests/read_only/test_analytics.py | 20 +- .../tests/read_only/test_ingestion_list.py | 7 +- smoke-test/tests/read_only/test_policies.py | 7 +- smoke-test/tests/read_only/test_search.py | 26 +- .../tests/read_only/test_services_up.py | 12 +- .../tests/schema_fields/schema_evolution.py | 125 +++--- .../tests/schema_fields/test_schemafields.py | 28 +- .../setup/lineage/ingest_data_job_change.py | 10 +- .../lineage/ingest_dataset_join_change.py | 12 +- .../lineage/ingest_input_datasets_change.py | 12 +- .../setup/lineage/ingest_time_lineage.py | 15 +- smoke-test/tests/setup/lineage/utils.py | 6 +- .../test_structured_properties.py | 204 +++++----- .../tags_and_terms/tags_and_terms_test.py | 151 ++++--- smoke-test/tests/telemetry/telemetry_test.py | 17 +- smoke-test/tests/test_stateful_ingestion.py | 21 +- smoke-test/tests/tests/tests_test.py | 89 ++--- smoke-test/tests/timeline/timeline_test.py | 133 +++++-- .../tokens/revokable_access_token_test.py | 104 ++--- smoke-test/tests/utils.py | 153 ++++++-- smoke-test/tests/views/views_test.py | 80 ++-- 76 files changed, 1864 insertions(+), 1950 deletions(-) create mode 100644 smoke-test/conftest.py delete mode 100644 smoke-test/requests_wrapper/__init__.py delete mode 100644 smoke-test/requests_wrapper/constants.py delete mode 100644 smoke-test/requests_wrapper/utils_requests_wrapper.py delete mode 100644 smoke-test/tests/conftest.py diff --git a/datahub-frontend/run/frontend.env b/datahub-frontend/run/frontend.env index 2c92febfcfed1..4b32fb64e1008 100644 --- a/datahub-frontend/run/frontend.env +++ b/datahub-frontend/run/frontend.env @@ -44,7 +44,7 @@ ELASTIC_CLIENT_PORT=9200 # AUTH_JAAS_ENABLED=false # Change to disable Metadata Service Authentication -METADATA_SERVICE_AUTH_ENABLED=true +# METADATA_SERVICE_AUTH_ENABLED=false # Change to override max header count defaults DATAHUB_AKKA_MAX_HEADER_COUNT=64 diff --git a/docker/datahub-frontend/env/docker.env b/docker/datahub-frontend/env/docker.env index 7e490813c93cf..74e7680f1b5ec 100644 --- a/docker/datahub-frontend/env/docker.env +++ b/docker/datahub-frontend/env/docker.env @@ -16,8 +16,8 @@ JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf # SSL_TRUSTSTORE_TYPE=jks # SSL_TRUSTSTORE_PASSWORD=MyTruststorePassword -# Uncomment to enable Metadata Service Authentication -# METADATA_SERVICE_AUTH_ENABLED=true +# Uncomment to disable Metadata Service Authentication +# METADATA_SERVICE_AUTH_ENABLED=false # Uncomment & populate these configs to enable OIDC SSO in React application. # Required OIDC configs diff --git a/docker/datahub-gms/env/docker.env b/docker/datahub-gms/env/docker.env index 59fc4bdde02ff..9a2ce30988805 100644 --- a/docker/datahub-gms/env/docker.env +++ b/docker/datahub-gms/env/docker.env @@ -27,8 +27,8 @@ MCE_CONSUMER_ENABLED=true PE_CONSUMER_ENABLED=true UI_INGESTION_ENABLED=true -# Uncomment to enable Metadata Service Authentication -METADATA_SERVICE_AUTH_ENABLED=false +# Uncomment to disable Metadata Service Authentication +# METADATA_SERVICE_AUTH_ENABLED=false # Uncomment to disable persistence of client-side analytics events # DATAHUB_ANALYTICS_ENABLED=false diff --git a/docker/docker-compose-with-cassandra.yml b/docker/docker-compose-with-cassandra.yml index de766f76cb626..6709aee98d697 100644 --- a/docker/docker-compose-with-cassandra.yml +++ b/docker/docker-compose-with-cassandra.yml @@ -39,6 +39,8 @@ services: context: ../ dockerfile: docker/datahub-gms/Dockerfile env_file: ./datahub-gms/env/docker.cassandra.env + environment: + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} healthcheck: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 20s diff --git a/docker/docker-compose-without-neo4j.override.yml b/docker/docker-compose-without-neo4j.override.yml index 09a27fb4573de..11d7cd0c0c87b 100644 --- a/docker/docker-compose-without-neo4j.override.yml +++ b/docker/docker-compose-without-neo4j.override.yml @@ -6,6 +6,7 @@ services: environment: - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} depends_on: datahub-upgrade: condition: service_completed_successfully diff --git a/docker/docker-compose-without-neo4j.postgres.override.yml b/docker/docker-compose-without-neo4j.postgres.override.yml index dd7590ffe09b9..b81fb6435c297 100644 --- a/docker/docker-compose-without-neo4j.postgres.override.yml +++ b/docker/docker-compose-without-neo4j.postgres.override.yml @@ -9,6 +9,7 @@ services: environment: - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} depends_on: datahub-upgrade: condition: service_completed_successfully diff --git a/docker/docker-compose-without-neo4j.yml b/docker/docker-compose-without-neo4j.yml index 748a2cc9e0416..53fcc77c6e8f3 100644 --- a/docker/docker-compose-without-neo4j.yml +++ b/docker/docker-compose-without-neo4j.yml @@ -42,6 +42,7 @@ services: env_file: datahub-gms/env/docker-without-neo4j.env environment: - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true} + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} healthcheck: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 90s diff --git a/docker/docker-compose.dev.yml b/docker/docker-compose.dev.yml index 7974b66ec87db..2202f362abd99 100644 --- a/docker/docker-compose.dev.yml +++ b/docker/docker-compose.dev.yml @@ -40,7 +40,7 @@ services: - SKIP_ELASTICSEARCH_CHECK=false - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-dev} - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} - - METADATA_SERVICE_AUTH_ENABLED=false + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} - JAVA_TOOL_OPTIONS=-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001 - BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE=false - SEARCH_SERVICE_ENABLE_CACHE=false diff --git a/docker/docker-compose.override.yml b/docker/docker-compose.override.yml index 0aae870078be1..51fbe0060aa5f 100644 --- a/docker/docker-compose.override.yml +++ b/docker/docker-compose.override.yml @@ -8,6 +8,7 @@ services: - DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart} - DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true} - GRAPH_SERVICE_IMPL=${GRAPH_SERVICE_IMPL:-elasticsearch} + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} volumes: - ${HOME}/.datahub/plugins:/etc/datahub/plugins datahub-upgrade: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index ae55861580bec..5430a8a6fcd5b 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -35,6 +35,7 @@ services: image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head} environment: - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true} + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} ports: - ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080 build: diff --git a/docker/profiles/docker-compose.gms.yml b/docker/profiles/docker-compose.gms.yml index 6e3e5780506ac..d010d19dd954b 100644 --- a/docker/profiles/docker-compose.gms.yml +++ b/docker/profiles/docker-compose.gms.yml @@ -101,6 +101,7 @@ x-datahub-gms-service: &datahub-gms-service environment: &datahub-gms-env <<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env] ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-search_config.yaml} + METADATA_SERVICE_AUTH_ENABLED: ${METADATA_SERVICE_AUTH_ENABLED:-true} healthcheck: test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health start_period: 90s diff --git a/docker/quickstart/docker-compose-m1.quickstart.yml b/docker/quickstart/docker-compose-m1.quickstart.yml index 834d55096468f..046ab96cf3002 100644 --- a/docker/quickstart/docker-compose-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-m1.quickstart.yml @@ -97,7 +97,7 @@ services: - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - MAE_CONSUMER_ENABLED=true - MCE_CONSUMER_ENABLED=true - - METADATA_SERVICE_AUTH_ENABLED=false + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} - NEO4J_HOST=http://neo4j:7474 - NEO4J_PASSWORD=datahub - NEO4J_URI=bolt://neo4j diff --git a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml index 47fb50f78e4f0..6295572aac98f 100644 --- a/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j-m1.quickstart.yml @@ -97,6 +97,7 @@ services: - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - MAE_CONSUMER_ENABLED=true - MCE_CONSUMER_ENABLED=true + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} - PE_CONSUMER_ENABLED=true - UI_INGESTION_ENABLED=true healthcheck: diff --git a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml index 3fa13a9e56b42..ed5f203ff4d05 100644 --- a/docker/quickstart/docker-compose-without-neo4j.quickstart.yml +++ b/docker/quickstart/docker-compose-without-neo4j.quickstart.yml @@ -97,6 +97,7 @@ services: - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - MAE_CONSUMER_ENABLED=true - MCE_CONSUMER_ENABLED=true + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} - PE_CONSUMER_ENABLED=true - UI_INGESTION_ENABLED=true healthcheck: diff --git a/docker/quickstart/docker-compose.quickstart.yml b/docker/quickstart/docker-compose.quickstart.yml index c63b6d1d61b03..66616be98bec1 100644 --- a/docker/quickstart/docker-compose.quickstart.yml +++ b/docker/quickstart/docker-compose.quickstart.yml @@ -97,7 +97,7 @@ services: - KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 - MAE_CONSUMER_ENABLED=true - MCE_CONSUMER_ENABLED=true - - METADATA_SERVICE_AUTH_ENABLED=false + - METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false} - NEO4J_HOST=http://neo4j:7474 - NEO4J_PASSWORD=datahub - NEO4J_URI=bolt://neo4j diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 37e21d2395629..ea53c2b470060 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -20,6 +20,9 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Breaking Changes +- Metadata service authentication enabled by default +- Rest API authorization enabled by default + ### Potential Downtime ### Deprecations diff --git a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py index 44fd32d5a426b..5b188edf9563b 100644 --- a/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py +++ b/metadata-ingestion/src/datahub/api/entities/structuredproperties/structuredproperties.py @@ -94,60 +94,59 @@ def urn_must_be_present(cls, v, values): return v @staticmethod - def create(file: str) -> None: - emitter: DataHubGraph - - with get_default_graph() as emitter: - with open(file) as fp: - structuredproperties: List[dict] = yaml.safe_load(fp) - for structuredproperty_raw in structuredproperties: - structuredproperty = StructuredProperties.parse_obj( - structuredproperty_raw + def create(file: str, graph: Optional[DataHubGraph] = None) -> None: + emitter: DataHubGraph = graph if graph else get_default_graph() + + with open(file) as fp: + structuredproperties: List[dict] = yaml.safe_load(fp) + for structuredproperty_raw in structuredproperties: + structuredproperty = StructuredProperties.parse_obj( + structuredproperty_raw + ) + if not structuredproperty.type.islower(): + structuredproperty.type = structuredproperty.type.lower() + logger.warn( + f"Structured property type should be lowercase. Updated to {structuredproperty.type}" ) - if not structuredproperty.type.islower(): - structuredproperty.type = structuredproperty.type.lower() - logger.warn( - f"Structured property type should be lowercase. Updated to {structuredproperty.type}" - ) - if not AllowedTypes.check_allowed_type(structuredproperty.type): - raise ValueError( - f"Type {structuredproperty.type} is not allowed. Allowed types are {AllowedTypes.values()}" - ) - mcp = MetadataChangeProposalWrapper( - entityUrn=structuredproperty.urn, - aspect=StructuredPropertyDefinitionClass( - qualifiedName=structuredproperty.fqn, - valueType=Urn.make_data_type_urn(structuredproperty.type), - displayName=structuredproperty.display_name, - description=structuredproperty.description, - entityTypes=[ - Urn.make_entity_type_urn(entity_type) - for entity_type in structuredproperty.entity_types or [] - ], - cardinality=structuredproperty.cardinality, - immutable=structuredproperty.immutable, - allowedValues=( - [ - PropertyValueClass( - value=v.value, description=v.description - ) - for v in structuredproperty.allowed_values - ] - if structuredproperty.allowed_values - else None - ), - typeQualifier=( - { - "allowedTypes": structuredproperty.type_qualifier.allowed_types - } - if structuredproperty.type_qualifier - else None - ), - ), + if not AllowedTypes.check_allowed_type(structuredproperty.type): + raise ValueError( + f"Type {structuredproperty.type} is not allowed. Allowed types are {AllowedTypes.values()}" ) - emitter.emit_mcp(mcp) + mcp = MetadataChangeProposalWrapper( + entityUrn=structuredproperty.urn, + aspect=StructuredPropertyDefinitionClass( + qualifiedName=structuredproperty.fqn, + valueType=Urn.make_data_type_urn(structuredproperty.type), + displayName=structuredproperty.display_name, + description=structuredproperty.description, + entityTypes=[ + Urn.make_entity_type_urn(entity_type) + for entity_type in structuredproperty.entity_types or [] + ], + cardinality=structuredproperty.cardinality, + immutable=structuredproperty.immutable, + allowedValues=( + [ + PropertyValueClass( + value=v.value, description=v.description + ) + for v in structuredproperty.allowed_values + ] + if structuredproperty.allowed_values + else None + ), + typeQualifier=( + { + "allowedTypes": structuredproperty.type_qualifier.allowed_types + } + if structuredproperty.type_qualifier + else None + ), + ), + ) + emitter.emit_mcp(mcp) - logger.info(f"Created structured property {structuredproperty.urn}") + logger.info(f"Created structured property {structuredproperty.urn}") @classmethod def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties": diff --git a/metadata-ingestion/src/datahub/cli/timeline_cli.py b/metadata-ingestion/src/datahub/cli/timeline_cli.py index 63e05aa65d9a5..08672528abb5d 100644 --- a/metadata-ingestion/src/datahub/cli/timeline_cli.py +++ b/metadata-ingestion/src/datahub/cli/timeline_cli.py @@ -9,7 +9,7 @@ from termcolor import colored from datahub.emitter.mce_builder import dataset_urn_to_key, schema_field_urn_to_key -from datahub.ingestion.graph.client import get_default_graph +from datahub.ingestion.graph.client import DataHubGraph, get_default_graph from datahub.telemetry import telemetry from datahub.upgrade import upgrade from datahub.utilities.urns.urn import Urn @@ -62,8 +62,9 @@ def get_timeline( start_time: Optional[int], end_time: Optional[int], diff: bool, + graph: Optional[DataHubGraph] = None, ) -> Any: - client = get_default_graph() + client = graph if graph else get_default_graph() session = client._session host = client.config.server if urn.startswith("urn%3A"): diff --git a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java index 30bfeadb021a7..228b8aed6e1dd 100644 --- a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java +++ b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java @@ -2,7 +2,8 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.when; -import static org.testng.AssertJUnit.*; +import static org.testng.Assert.assertNotNull; +import static org.testng.Assert.assertTrue; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; @@ -19,7 +20,8 @@ @ActiveProfiles("test") @SpringBootTest( webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT, - classes = {MceConsumerApplication.class, MceConsumerApplicationTestConfiguration.class}) + classes = {MceConsumerApplication.class, MceConsumerApplicationTestConfiguration.class}, + properties = {"authentication.enabled=false"}) public class MceConsumerApplicationTest extends AbstractTestNGSpringContextTests { @Autowired private TestRestTemplate restTemplate; @@ -38,7 +40,7 @@ public void testRestliServletConfig() { String response = this.restTemplate.postForObject( "/gms/aspects?action=restoreIndices", "{\"urn\":\"\"}", String.class); - assertTrue(response.contains(mockResult.toString())); + assertTrue(response.contains(mockResult.toString()), String.format("Found: %s", response)); } @Test diff --git a/metadata-service/configuration/src/main/resources/application.yaml b/metadata-service/configuration/src/main/resources/application.yaml index e8d5be9bfb8f1..0ce0b976c976e 100644 --- a/metadata-service/configuration/src/main/resources/application.yaml +++ b/metadata-service/configuration/src/main/resources/application.yaml @@ -3,8 +3,8 @@ baseUrl: ${DATAHUB_BASE_URL:http://localhost:9002} # App Layer authentication: - # Enable if you want all requests to the Metadata Service to be authenticated. Disabled by default. - enabled: ${METADATA_SERVICE_AUTH_ENABLED:false} + # Enable if you want all requests to the Metadata Service to be authenticated. + enabled: ${METADATA_SERVICE_AUTH_ENABLED:true} # Required if enabled is true! A configurable chain of Authenticators authenticators: @@ -43,8 +43,8 @@ authorization: enabled: ${AUTH_POLICIES_ENABLED:true} cacheRefreshIntervalSecs: ${POLICY_CACHE_REFRESH_INTERVAL_SECONDS:120} cachePolicyFetchSize: ${POLICY_CACHE_FETCH_SIZE:1000} - # Enables authorization of reads, writes, and deletes on REST APIs. Defaults to false for backwards compatibility, but should become true down the road - restApiAuthorization: ${REST_API_AUTHORIZATION_ENABLED:false} + # Enables authorization of reads, writes, and deletes on REST APIs. + restApiAuthorization: ${REST_API_AUTHORIZATION_ENABLED:true} view: enabled: ${VIEW_AUTHORIZATION_ENABLED:false} recommendations: diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java index dc4726900a1c3..ba0a426fa20e8 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/GlobalControllerExceptionHandler.java @@ -2,6 +2,7 @@ import com.linkedin.metadata.dao.throttle.APIThrottleException; import io.datahubproject.openapi.exception.InvalidUrnException; +import io.datahubproject.openapi.exception.UnauthorizedException; import java.util.Map; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.ConversionNotSupportedException; @@ -45,4 +46,10 @@ public static ResponseEntity> handleThrottleException( return new ResponseEntity<>( Map.of("error", e.getMessage()), headers, HttpStatus.TOO_MANY_REQUESTS); } + + @ExceptionHandler(UnauthorizedException.class) + public static ResponseEntity> handleUnauthorizedException( + UnauthorizedException e) { + return new ResponseEntity<>(Map.of("error", e.getMessage()), HttpStatus.FORBIDDEN); + } } diff --git a/smoke-test/conftest.py b/smoke-test/conftest.py new file mode 100644 index 0000000000000..69a58cd766182 --- /dev/null +++ b/smoke-test/conftest.py @@ -0,0 +1,39 @@ +import os + +import pytest +import requests +from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph + +from tests.test_result_msg import send_message +from tests.utils import ( + TestSessionWrapper, + get_frontend_session, + wait_for_healthcheck_util, +) + +# Disable telemetry +os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false" + + +@pytest.fixture(scope="session") +def auth_session(): + wait_for_healthcheck_util(requests) + auth_session = TestSessionWrapper(get_frontend_session()) + yield auth_session + auth_session.destroy() + + +@pytest.fixture(scope="session") +def graph_client(auth_session) -> DataHubGraph: + print(auth_session.cookies) + graph: DataHubGraph = DataHubGraph( + config=DatahubClientConfig( + server=auth_session.gms_url(), token=auth_session.gms_token() + ) + ) + return graph + + +def pytest_sessionfinish(session, exitstatus): + """whole test run finishes.""" + send_message(exitstatus) diff --git a/smoke-test/requests_wrapper/__init__.py b/smoke-test/requests_wrapper/__init__.py deleted file mode 100644 index c2f4190e6150d..0000000000000 --- a/smoke-test/requests_wrapper/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .utils_requests_wrapper import CustomSession as Session -from .utils_requests_wrapper import get, post -from .constants import * -from requests import exceptions diff --git a/smoke-test/requests_wrapper/constants.py b/smoke-test/requests_wrapper/constants.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/smoke-test/requests_wrapper/utils_requests_wrapper.py b/smoke-test/requests_wrapper/utils_requests_wrapper.py deleted file mode 100644 index d6bf0d4af011e..0000000000000 --- a/smoke-test/requests_wrapper/utils_requests_wrapper.py +++ /dev/null @@ -1,27 +0,0 @@ -import requests -from tests.consistency_utils import wait_for_writes_to_sync - - -class CustomSession(requests.Session): - """ - Create a custom session to add consistency delay on writes - """ - - def post(self, *args, **kwargs): - response = super(CustomSession, self).post(*args, **kwargs) - if "/logIn" not in args[0]: - print("sleeping.") - wait_for_writes_to_sync() - return response - - -def post(*args, **kwargs): - response = requests.post(*args, **kwargs) - if "/logIn" not in args[0]: - print("sleeping.") - wait_for_writes_to_sync() - return response - - -def get(*args, **kwargs): - return requests.get(*args, **kwargs) diff --git a/smoke-test/run-quickstart.sh b/smoke-test/run-quickstart.sh index eb0d46b317244..e83a116c670a4 100755 --- a/smoke-test/run-quickstart.sh +++ b/smoke-test/run-quickstart.sh @@ -17,10 +17,10 @@ XPACK_SECURITY_ENABLED="${XPACK_SECURITY_ENABLED:=plugins.security.disabled=true ELASTICSEARCH_USE_SSL="${ELASTICSEARCH_USE_SSL:=false}" USE_AWS_ELASTICSEARCH="${USE_AWS_ELASTICSEARCH:=true}" -DATAHUB_TELEMETRY_ENABLED=false \ +DATAHUB_TELEMETRY_ENABLED=false \ DOCKER_COMPOSE_BASE="file://$( dirname "$DIR" )" \ DATAHUB_SEARCH_IMAGE="$DATAHUB_SEARCH_IMAGE" DATAHUB_SEARCH_TAG="$DATAHUB_SEARCH_TAG" \ XPACK_SECURITY_ENABLED="$XPACK_SECURITY_ENABLED" ELASTICSEARCH_USE_SSL="$ELASTICSEARCH_USE_SSL" \ USE_AWS_ELASTICSEARCH="$USE_AWS_ELASTICSEARCH" \ -DATAHUB_VERSION=${DATAHUB_VERSION} \ +DATAHUB_VERSION=${DATAHUB_VERSION} \ docker compose --project-directory ../docker/profiles --profile quickstart-consumers up -d --quiet-pull --wait --wait-timeout 900 diff --git a/smoke-test/test_e2e.py b/smoke-test/test_e2e.py index 74d64a8193173..51559a6bcdff0 100644 --- a/smoke-test/test_e2e.py +++ b/smoke-test/test_e2e.py @@ -1,23 +1,24 @@ +import logging import time import urllib from http import HTTPStatus from typing import Any, Optional +import concurrent.futures import pytest -import requests_wrapper as requests +import requests import tenacity from datahub.ingestion.run.pipeline import Pipeline +logger = logging.getLogger(__name__) + pytestmark = pytest.mark.no_cypress_suite1 from tests.utils import ( - get_frontend_url, - get_gms_url, get_kafka_broker_url, get_kafka_schema_registry, get_sleep_info, ingest_file_via_rest, - wait_for_healthcheck_util, get_frontend_session, get_admin_credentials, get_root_urn, @@ -37,29 +38,12 @@ sleep_sec, sleep_times = get_sleep_info() -@pytest.fixture(scope="session") -def wait_for_healthchecks(): - wait_for_healthcheck_util() - yield - - -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - -@pytest.fixture(scope="session") -def frontend_session(wait_for_healthchecks): - yield get_frontend_session() - - @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_user_present(urn: str): - response = requests.get( - f"{get_gms_url()}/entities/{urllib.parse.quote(urn)}", +def _ensure_user_present(auth_session, urn: str): + response = auth_session.get( + f"{auth_session.gms_url()}/entities/{urllib.parse.quote(urn)}", headers={ **restli_default_headers, }, @@ -77,7 +61,7 @@ def _ensure_user_present(urn: str): @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_user_relationship_present(frontend_session, urn, relationships): +def _ensure_user_relationship_present(auth_session, urn, relationships): json = { "query": """query corpUser($urn: String!) {\n corpUser(urn: $urn) {\n @@ -89,7 +73,7 @@ def _ensure_user_relationship_present(frontend_session, urn, relationships): }""", "variables": {"urn": urn}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -104,11 +88,12 @@ def _ensure_user_relationship_present(frontend_session, urn, relationships): stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_dataset_present( + auth_session: Any, urn: str, aspects: Optional[str] = "datasetProperties", ) -> Any: - response = requests.get( - f"{get_gms_url()}/entitiesV2?ids=List({urllib.parse.quote(urn)})&aspects=List({aspects})", + response = auth_session.get( + f"{auth_session.gms_url()}/entitiesV2?ids=List({urllib.parse.quote(urn)})&aspects=List({aspects})", headers={ **restli_default_headers, "X-RestLi-Method": "batch_get", @@ -125,7 +110,7 @@ def _ensure_dataset_present( @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_group_not_present(urn: str, frontend_session) -> Any: +def _ensure_group_not_present(auth_session, urn: str) -> Any: json = { "query": """query corpGroup($urn: String!) {\n corpGroup(urn: $urn) {\n @@ -137,7 +122,7 @@ def _ensure_group_not_present(urn: str, frontend_session) -> Any: }""", "variables": {"urn": urn}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -147,21 +132,16 @@ def _ensure_group_not_present(urn: str, frontend_session) -> Any: assert res_data["data"]["corpGroup"]["properties"] is None -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_ingestion_via_rest(wait_for_healthchecks): - ingest_file_via_rest(bootstrap_sample_data) - _ensure_user_present(urn=get_root_urn()) - wait_for_writes_to_sync() +def fixture_ingestion_via_rest(auth_session): + ingest_file_via_rest(auth_session, bootstrap_sample_data) + _ensure_user_present(auth_session, urn=get_root_urn()) -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_ingestion_usage_via_rest(wait_for_healthchecks): - ingest_file_via_rest(usage_sample_data) - wait_for_writes_to_sync() +def fixture_ingestion_usage_via_rest(auth_session): + ingest_file_via_rest(auth_session, usage_sample_data) -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_ingestion_via_kafka(wait_for_healthchecks): +def fixture_ingestion_via_kafka(auth_session): pipeline = Pipeline.create( { "source": { @@ -181,34 +161,38 @@ def test_ingestion_via_kafka(wait_for_healthchecks): ) pipeline.run() pipeline.raise_from_status() - _ensure_dataset_present( + _ensure_dataset_present(auth_session, "urn:li:dataset:(urn:li:dataPlatform:bigquery,bigquery-public-data.covid19_geotab_mobility_impact.us_border_wait_times,PROD)" ) # Since Kafka emission is asynchronous, we must wait a little bit so that # the changes are actually processed. time.sleep(kafka_post_ingestion_wait_sec) - wait_for_writes_to_sync() -@pytest.mark.dependency( - depends=[ - "test_ingestion_via_rest", - "test_ingestion_via_kafka", - "test_ingestion_usage_via_rest", - ] -) -def test_run_ingestion(wait_for_healthchecks): +@pytest.fixture(scope='module', autouse=True) +def test_run_ingestion(auth_session): # Dummy test so that future ones can just depend on this one. + + # The rest sink fixtures cannot run at the same time, limitation of the Pipeline code + fixture_ingestion_usage_via_rest(auth_session) + + with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor: + futures = [] + for ingestion_fixture in ["fixture_ingestion_via_kafka", "fixture_ingestion_via_rest"]: + futures.append( + executor.submit(globals()[ingestion_fixture], auth_session) + ) + + for future in concurrent.futures.as_completed(futures): + logger.info(future.result()) wait_for_writes_to_sync() - pass -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_gms_get_user(): +def test_gms_get_user(auth_session): username = "jdoe" urn = f"urn:li:corpuser:{username}" - _ensure_user_present(urn=urn) + _ensure_user_present(auth_session, urn=urn) @pytest.mark.parametrize( @@ -228,8 +212,7 @@ def test_gms_get_user(): ), ], ) -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_gms_get_dataset(platform, dataset_name, env): +def test_gms_get_dataset(auth_session, platform, dataset_name, env): platform = "urn:li:dataPlatform:bigquery" dataset_name = ( "bigquery-public-data.covid19_geotab_mobility_impact.us_border_wait_times" @@ -237,8 +220,8 @@ def test_gms_get_dataset(platform, dataset_name, env): env = "PROD" urn = f"urn:li:dataset:({platform},{dataset_name},{env})" - response = requests.get( - f"{get_gms_url()}/entities/{urllib.parse.quote(urn)}", + response = auth_session.get( + f"{auth_session.gms_url()}/entities/{urllib.parse.quote(urn)}", headers={ **restli_default_headers, "X-RestLi-Method": "get", @@ -255,8 +238,7 @@ def test_gms_get_dataset(platform, dataset_name, env): ) -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_gms_batch_get_v2(): +def test_gms_batch_get_v2(auth_session): platform = "urn:li:dataPlatform:bigquery" env = "PROD" name_1 = "bigquery-public-data.covid19_geotab_mobility_impact.us_border_wait_times" @@ -264,10 +246,10 @@ def test_gms_batch_get_v2(): urn1 = f"urn:li:dataset:({platform},{name_1},{env})" urn2 = f"urn:li:dataset:({platform},{name_2},{env})" - resp1 = _ensure_dataset_present(urn1, aspects="datasetProperties,ownership") + resp1 = _ensure_dataset_present(auth_session, urn1, aspects="datasetProperties,ownership") assert resp1["results"][urn1]["aspects"]["ownership"] - resp2 = _ensure_dataset_present(urn2, aspects="datasetProperties,ownership") + resp2 = _ensure_dataset_present(auth_session, urn2, aspects="datasetProperties,ownership") assert ( "ownership" not in resp2["results"][urn2]["aspects"] ) # Aspect does not exist. @@ -280,13 +262,12 @@ def test_gms_batch_get_v2(): ("sample", 3), ], ) -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_gms_search_dataset(query, min_expected_results): +def test_gms_search_dataset(auth_session, query, min_expected_results): json = {"input": f"{query}", "entity": "dataset", "start": 0, "count": 10} print(json) - response = requests.post( - f"{get_gms_url()}/entities?action=search", + response = auth_session.post( + f"{auth_session.gms_url()}/entities?action=search", headers=restli_default_headers, json=json, ) @@ -305,13 +286,12 @@ def test_gms_search_dataset(query, min_expected_results): ("sample", 3), ], ) -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_gms_search_across_entities(query, min_expected_results): +def test_gms_search_across_entities(auth_session, query, min_expected_results): json = {"input": f"{query}", "entities": [], "start": 0, "count": 10} print(json) - response = requests.post( - f"{get_gms_url()}/entities?action=searchAcrossEntities", + response = auth_session.post( + f"{auth_session.gms_url()}/entities?action=searchAcrossEntities", headers=restli_default_headers, json=json, ) @@ -323,10 +303,9 @@ def test_gms_search_across_entities(query, min_expected_results): assert len(res_data["value"]["entities"]) >= min_expected_results -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_gms_usage_fetch(): - response = requests.post( - f"{get_gms_url()}/usageStats?action=queryRange", +def test_gms_usage_fetch(auth_session): + response = auth_session.post( + f"{auth_session.gms_url()}/usageStats?action=queryRange", headers=restli_default_headers, json={ "resource": "urn:li:dataset:(urn:li:dataPlatform:bigquery,harshal-playground-306419.test_schema.excess_deaths_derived,PROD)", @@ -356,13 +335,11 @@ def test_gms_usage_fetch(): } -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_frontend_auth(frontend_session): +def test_frontend_auth(auth_session): pass -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_frontend_browse_datasets(frontend_session): +def test_frontend_browse_datasets(auth_session): json = { "query": """query browse($input: BrowseInput!) {\n @@ -384,7 +361,7 @@ def test_frontend_browse_datasets(frontend_session): "variables": {"input": {"type": "DATASET", "path": ["prod"]}}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -403,8 +380,7 @@ def test_frontend_browse_datasets(frontend_session): ("", 1), ], ) -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_frontend_search_datasets(frontend_session, query, min_expected_results): +def test_frontend_search_datasets(auth_session, query, min_expected_results): json = { "query": """query search($input: SearchInput!) {\n @@ -427,7 +403,7 @@ def test_frontend_search_datasets(frontend_session, query, min_expected_results) }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -446,8 +422,7 @@ def test_frontend_search_datasets(frontend_session, query, min_expected_results) ("", 1), ], ) -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_frontend_search_across_entities(frontend_session, query, min_expected_results): +def test_frontend_search_across_entities(auth_session, query, min_expected_results): json = { "query": """query searchAcrossEntities($input: SearchAcrossEntitiesInput!) {\n @@ -470,7 +445,7 @@ def test_frontend_search_across_entities(frontend_session, query, min_expected_r }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -484,8 +459,7 @@ def test_frontend_search_across_entities(frontend_session, query, min_expected_r ) -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_frontend_user_info(frontend_session): +def test_frontend_user_info(auth_session): urn = get_root_urn() json = { @@ -506,7 +480,7 @@ def test_frontend_user_info(frontend_session): }""", "variables": {"urn": urn}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -533,8 +507,7 @@ def test_frontend_user_info(frontend_session): ), ], ) -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_frontend_datasets(frontend_session, platform, dataset_name, env): +def test_frontend_datasets(auth_session, platform, dataset_name, env): urn = f"urn:li:dataset:({platform},{dataset_name},{env})" json = { "query": """query getDataset($urn: String!) {\n @@ -555,7 +528,7 @@ def test_frontend_datasets(frontend_session, platform, dataset_name, env): "variables": {"urn": urn}, } # Basic dataset info. - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -567,10 +540,9 @@ def test_frontend_datasets(frontend_session, platform, dataset_name, env): assert res_data["data"]["dataset"]["platform"]["urn"] == platform -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_ingest_with_system_metadata(): - response = requests.post( - f"{get_gms_url()}/entities?action=ingest", +def test_ingest_with_system_metadata(auth_session, test_run_ingestion): + response = auth_session.post( + f"{auth_session.gms_url()}/entities?action=ingest", headers=restli_default_headers, json={ "entity": { @@ -600,10 +572,9 @@ def test_ingest_with_system_metadata(): response.raise_for_status() -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_ingest_with_blank_system_metadata(): - response = requests.post( - f"{get_gms_url()}/entities?action=ingest", +def test_ingest_with_blank_system_metadata(auth_session): + response = auth_session.post( + f"{auth_session.gms_url()}/entities?action=ingest", headers=restli_default_headers, json={ "entity": { @@ -630,10 +601,9 @@ def test_ingest_with_blank_system_metadata(): response.raise_for_status() -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_ingest_without_system_metadata(): - response = requests.post( - f"{get_gms_url()}/entities?action=ingest", +def test_ingest_without_system_metadata(auth_session): + response = auth_session.post( + f"{auth_session.gms_url()}/entities?action=ingest", headers=restli_default_headers, json={ "entity": { @@ -659,8 +629,7 @@ def test_ingest_without_system_metadata(): response.raise_for_status() -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_frontend_app_config(frontend_session): +def test_frontend_app_config(auth_session): json = { "query": """query appConfig {\n @@ -690,7 +659,7 @@ def test_frontend_app_config(frontend_session): }""" } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -701,8 +670,7 @@ def test_frontend_app_config(frontend_session): assert res_data["data"]["appConfig"]["policiesConfig"]["enabled"] is True -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_frontend_me_query(frontend_session): +def test_frontend_me_query(auth_session): json = { "query": """query me {\n @@ -731,7 +699,7 @@ def test_frontend_me_query(frontend_session): }""" } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -748,8 +716,7 @@ def test_frontend_me_query(frontend_session): ) -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_list_users(frontend_session): +def test_list_users(auth_session): json = { "query": """query listUsers($input: ListUsersInput!) {\n @@ -774,7 +741,7 @@ def test_list_users(frontend_session): } }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -788,8 +755,8 @@ def test_list_users(frontend_session): ) # Length of default user set. -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_list_groups(frontend_session): +@pytest.mark.dependency() +def test_list_groups(auth_session): json = { "query": """query listGroups($input: ListGroupsInput!) {\n @@ -814,7 +781,7 @@ def test_list_groups(frontend_session): } }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -829,9 +796,9 @@ def test_list_groups(frontend_session): @pytest.mark.dependency( - depends=["test_healthchecks", "test_run_ingestion", "test_list_groups"] + depends=["test_list_groups"] ) -def test_add_remove_members_from_group(frontend_session): +def test_add_remove_members_from_group(auth_session): # Assert no group edges for user jdoe json = { @@ -845,7 +812,7 @@ def test_add_remove_members_from_group(frontend_session): }""", "variables": {"urn": "urn:li:corpuser:jdoe"}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -866,11 +833,11 @@ def test_add_remove_members_from_group(frontend_session): }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() # Verify the member has been added - _ensure_user_relationship_present(frontend_session, "urn:li:corpuser:jdoe", 1) + _ensure_user_relationship_present(auth_session, "urn:li:corpuser:jdoe", 1) # Now remove jdoe from the group json = { @@ -884,15 +851,15 @@ def test_add_remove_members_from_group(frontend_session): }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() # Verify the member has been removed - _ensure_user_relationship_present(frontend_session, "urn:li:corpuser:jdoe", 0) + _ensure_user_relationship_present(auth_session, "urn:li:corpuser:jdoe", 0) -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_update_corp_group_properties(frontend_session): +@pytest.mark.dependency() +def test_update_corp_group_properties(auth_session): group_urn = "urn:li:corpGroup:bfoo" @@ -910,7 +877,7 @@ def test_update_corp_group_properties(frontend_session): }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() print(res_data) @@ -931,7 +898,7 @@ def test_update_corp_group_properties(frontend_session): }""", "variables": {"urn": group_urn}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -956,18 +923,16 @@ def test_update_corp_group_properties(frontend_session): }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() @pytest.mark.dependency( depends=[ - "test_healthchecks", - "test_run_ingestion", "test_update_corp_group_properties", ] ) -def test_update_corp_group_description(frontend_session): +def test_update_corp_group_description(auth_session): group_urn = "urn:li:corpGroup:bfoo" @@ -980,7 +945,7 @@ def test_update_corp_group_description(frontend_session): }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() print(res_data) @@ -999,7 +964,7 @@ def test_update_corp_group_description(frontend_session): }""", "variables": {"urn": group_urn}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -1022,19 +987,17 @@ def test_update_corp_group_description(frontend_session): }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() @pytest.mark.dependency( depends=[ - "test_healthchecks", - "test_run_ingestion", "test_list_groups", "test_add_remove_members_from_group", ] ) -def test_remove_user(frontend_session): +def test_remove_user(auth_session): json = { "query": """mutation removeUser($urn: String!) {\n @@ -1042,7 +1005,7 @@ def test_remove_user(frontend_session): "variables": {"urn": "urn:li:corpuser:jdoe"}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() json = { @@ -1056,7 +1019,7 @@ def test_remove_user(frontend_session): }""", "variables": {"urn": "urn:li:corpuser:jdoe"}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -1069,13 +1032,11 @@ def test_remove_user(frontend_session): @pytest.mark.dependency( depends=[ - "test_healthchecks", - "test_run_ingestion", "test_list_groups", "test_add_remove_members_from_group", ] ) -def test_remove_group(frontend_session): +def test_remove_group(auth_session): group_urn = "urn:li:corpGroup:bfoo" json = { @@ -1084,21 +1045,19 @@ def test_remove_group(frontend_session): "variables": {"urn": group_urn}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() - _ensure_group_not_present(group_urn, frontend_session) + _ensure_group_not_present(auth_session, group_urn) @pytest.mark.dependency( depends=[ - "test_healthchecks", - "test_run_ingestion", "test_list_groups", "test_remove_group", ] ) -def test_create_group(frontend_session): +def test_create_group(auth_session): json = { "query": """mutation createGroup($input: CreateGroupInput!) {\n @@ -1112,7 +1071,7 @@ def test_create_group(frontend_session): }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() json = { @@ -1126,7 +1085,7 @@ def test_create_group(frontend_session): }""", "variables": {"urn": "urn:li:corpGroup:test-id"}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -1136,8 +1095,7 @@ def test_create_group(frontend_session): assert res_data["data"]["corpGroup"]["properties"]["displayName"] == "Test Group" -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_home_page_recommendations(frontend_session): +def test_home_page_recommendations(auth_session): min_expected_recommendation_modules = 0 @@ -1153,7 +1111,7 @@ def test_home_page_recommendations(frontend_session): }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() print(res_data) @@ -1168,8 +1126,7 @@ def test_home_page_recommendations(frontend_session): ) -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_search_results_recommendations(frontend_session): +def test_search_results_recommendations(auth_session): # This test simply ensures that the recommendations endpoint does not return an error. json = { @@ -1187,7 +1144,7 @@ def test_search_results_recommendations(frontend_session): }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -1195,8 +1152,7 @@ def test_search_results_recommendations(frontend_session): assert "errors" not in res_data -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_generate_personal_access_token(frontend_session): +def test_generate_personal_access_token(auth_session): # Test success case json = { @@ -1214,7 +1170,7 @@ def test_generate_personal_access_token(frontend_session): }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -1239,7 +1195,7 @@ def test_generate_personal_access_token(frontend_session): }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -1247,9 +1203,9 @@ def test_generate_personal_access_token(frontend_session): assert "errors" in res_data # Assert the request fails -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_native_user_endpoints(frontend_session): +def test_native_user_endpoints(auth_session): # Sign up tests + frontend_session = get_frontend_session() # Test getting the invite token get_invite_token_json = { @@ -1262,7 +1218,7 @@ def test_native_user_endpoints(frontend_session): } get_invite_token_response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=get_invite_token_json + f"{auth_session.frontend_url()}/api/v2/graphql", json=get_invite_token_json ) get_invite_token_response.raise_for_status() get_invite_token_res_data = get_invite_token_response.json() @@ -1283,14 +1239,14 @@ def test_native_user_endpoints(frontend_session): } sign_up_response = frontend_session.post( - f"{get_frontend_url()}/signUp", json=sign_up_json + f"{auth_session.frontend_url()}/signUp", json=sign_up_json ) assert sign_up_response assert "errors" not in sign_up_response # Creating the same user again fails same_user_sign_up_response = frontend_session.post( - f"{get_frontend_url()}/signUp", json=sign_up_json + f"{auth_session.frontend_url()}/signUp", json=sign_up_json ) assert not same_user_sign_up_response @@ -1303,7 +1259,7 @@ def test_native_user_endpoints(frontend_session): "inviteToken": "invite_token", } bad_sign_up_response = frontend_session.post( - f"{get_frontend_url()}/signUp", json=bad_sign_up_json + f"{auth_session.frontend_url()}/signUp", json=bad_sign_up_json ) assert not bad_sign_up_response @@ -1318,7 +1274,7 @@ def test_native_user_endpoints(frontend_session): username, password = get_admin_credentials() root_login_data = '{"username":"' + username + '", "password":"' + password + '"}' frontend_session.post( - f"{get_frontend_url()}/logIn", headers=headers, data=root_login_data + f"{auth_session.frontend_url()}/logIn", headers=headers, data=root_login_data ) # Test creating the password reset token @@ -1332,7 +1288,7 @@ def test_native_user_endpoints(frontend_session): } create_reset_token_response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=create_reset_token_json + f"{auth_session.frontend_url()}/api/v2/graphql", json=create_reset_token_json ) create_reset_token_response.raise_for_status() create_reset_token_res_data = create_reset_token_response.json() @@ -1353,7 +1309,7 @@ def test_native_user_endpoints(frontend_session): } reset_credentials_response = frontend_session.post( - f"{get_frontend_url()}/resetNativeUserCredentials", json=reset_credentials_json + f"{auth_session.frontend_url()}/resetNativeUserCredentials", json=reset_credentials_json ) assert reset_credentials_response assert "errors" not in reset_credentials_response @@ -1365,7 +1321,7 @@ def test_native_user_endpoints(frontend_session): "resetToken": "reset_token", } bad_reset_credentials_response = frontend_session.post( - f"{get_frontend_url()}/resetNativeUserCredentials", + f"{auth_session.frontend_url()}/resetNativeUserCredentials", json=bad_user_reset_credentials_json, ) assert not bad_reset_credentials_response @@ -1377,7 +1333,7 @@ def test_native_user_endpoints(frontend_session): "resetToken": reset_token, } jaas_user_reset_credentials_response = frontend_session.post( - f"{get_frontend_url()}/resetNativeUserCredentials", + f"{auth_session.frontend_url()}/resetNativeUserCredentials", json=jaas_user_reset_credentials_json, ) assert not jaas_user_reset_credentials_response @@ -1387,7 +1343,7 @@ def test_native_user_endpoints(frontend_session): unauthenticated_session = requests.Session() unauthenticated_get_invite_token_response = unauthenticated_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=get_invite_token_json + f"{auth_session.frontend_url()}/api/v2/graphql", json=get_invite_token_json ) assert ( unauthenticated_get_invite_token_response.status_code == HTTPStatus.UNAUTHORIZED @@ -1403,7 +1359,7 @@ def test_native_user_endpoints(frontend_session): } unauthenticated_create_reset_token_response = unauthenticated_session.post( - f"{get_frontend_url()}/api/v2/graphql", + f"{auth_session.frontend_url()}/api/v2/graphql", json=unauthenticated_create_reset_token_json, ) assert ( @@ -1419,11 +1375,11 @@ def test_native_user_endpoints(frontend_session): } frontend_session.post( - f"{get_frontend_url()}/logIn", headers=headers, data=root_login_data + f"{auth_session.frontend_url()}/logIn", headers=headers, data=root_login_data ) remove_user_response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json + f"{auth_session.frontend_url()}/api/v2/graphql", json=json ) remove_user_response.raise_for_status() assert "errors" not in remove_user_response diff --git a/smoke-test/test_rapid.py b/smoke-test/test_rapid.py index 98db3ee50ee40..c83ae5aec1884 100644 --- a/smoke-test/test_rapid.py +++ b/smoke-test/test_rapid.py @@ -2,11 +2,8 @@ import tenacity from tests.utils import ( - get_frontend_url, ingest_file_via_rest, - wait_for_healthcheck_util, get_sleep_info, - get_frontend_session, ) sleep_sec, sleep_times = get_sleep_info() @@ -15,21 +12,10 @@ bootstrap_small_2 = "test_resources/bootstrap_single2.json" -@pytest.fixture(scope="session") -def wait_for_healthchecks(): - wait_for_healthcheck_util() - yield - - -@pytest.fixture(scope="session") -def frontend_session(wait_for_healthchecks): - yield get_frontend_session() - - @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_dataset_present_correctly(frontend_session): +def _ensure_dataset_present_correctly(auth_session): urn = "urn:li:dataset:(urn:li:dataPlatform:testPlatform,testDataset,PROD)" json = { "query": """query getDataset($urn: String!) {\n @@ -64,7 +50,7 @@ def _ensure_dataset_present_correctly(frontend_session): }""", "variables": {"urn": urn}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) response.raise_for_status() res_data = response.json() @@ -75,7 +61,7 @@ def _ensure_dataset_present_correctly(frontend_session): assert len(res_data["data"]["dataset"]["outgoing"]["relationships"]) == 1 -def test_ingestion_via_rest_rapid(frontend_session, wait_for_healthchecks): - ingest_file_via_rest(bootstrap_small) - ingest_file_via_rest(bootstrap_small_2) - _ensure_dataset_present_correctly(frontend_session) +def test_ingestion_via_rest_rapid(auth_session): + ingest_file_via_rest(auth_session, bootstrap_small) + ingest_file_via_rest(auth_session, bootstrap_small_2) + _ensure_dataset_present_correctly(auth_session) diff --git a/smoke-test/tests/assertions/assertions_test.py b/smoke-test/tests/assertions/assertions_test.py index 78ba68a840f0d..8615a55d7289f 100644 --- a/smoke-test/tests/assertions/assertions_test.py +++ b/smoke-test/tests/assertions/assertions_test.py @@ -23,14 +23,7 @@ PartitionTypeClass, ) -import requests_wrapper as requests -from tests.utils import ( - delete_urns_from_file, - get_gms_url, - get_sleep_info, - ingest_file_via_rest, - wait_for_healthcheck_util, -) +from tests.utils import delete_urns_from_file, get_sleep_info, ingest_file_via_rest restli_default_headers = { "X-RestLi-Protocol-Version": "2.0.0", @@ -225,8 +218,8 @@ def create_test_data(test_file): fileSink.close() -@pytest.fixture(scope="session") -def generate_test_data(tmp_path_factory): +@pytest.fixture(scope="module") +def generate_test_data(graph_client, tmp_path_factory): """Generates metadata events data and stores into a test file""" print("generating assertions test data") dir_name = tmp_path_factory.mktemp("test_dq_events") @@ -234,30 +227,18 @@ def generate_test_data(tmp_path_factory): create_test_data(test_file=str(file_name)) yield str(file_name) print("removing assertions test data") - delete_urns_from_file(str(file_name)) - - -@pytest.fixture(scope="session") -def wait_for_healthchecks(generate_test_data): - wait_for_healthcheck_util() - yield - - -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass + delete_urns_from_file(graph_client, str(file_name)) -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_run_ingestion(generate_test_data): - ingest_file_via_rest(generate_test_data) +@pytest.fixture(scope="module") +def test_run_ingestion(auth_session, generate_test_data): + ingest_file_via_rest(auth_session, generate_test_data) @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _gms_get_latest_assertions_results_by_partition(): +def _gms_get_latest_assertions_results_by_partition(auth_session): urn = make_dataset_urn("postgres", "foo") # Query @@ -293,8 +274,8 @@ def _gms_get_latest_assertions_results_by_partition(): ], } ) - response = requests.post( - f"{get_gms_url()}/analytics?action=getTimeseriesStats", + response = auth_session.post( + f"{auth_session.gms_url()}/analytics?action=getTimeseriesStats", data=query, headers=restli_default_headers, ) @@ -320,17 +301,17 @@ def _gms_get_latest_assertions_results_by_partition(): ) -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_gms_get_latest_assertions_results_by_partition(): - _gms_get_latest_assertions_results_by_partition() +def test_gms_get_latest_assertions_results_by_partition( + auth_session, test_run_ingestion +): + _gms_get_latest_assertions_results_by_partition(auth_session) -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_gms_get_assertions_on_dataset(): +def test_gms_get_assertions_on_dataset(auth_session, test_run_ingestion): """lists all assertion urns including those which may not have executed""" urn = make_dataset_urn("postgres", "foo") - response = requests.get( - f"{get_gms_url()}/relationships?direction=INCOMING&urn={urllib.parse.quote(urn)}&types=Asserts" + response = auth_session.get( + f"{auth_session.gms_url()}/relationships?direction=INCOMING&urn={urllib.parse.quote(urn)}&types=Asserts" ) response.raise_for_status() @@ -338,13 +319,12 @@ def test_gms_get_assertions_on_dataset(): assert len(data["relationships"]) == 1 -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_gms_get_assertions_on_dataset_field(): +def test_gms_get_assertions_on_dataset_field(auth_session, test_run_ingestion): """lists all assertion urns including those which may not have executed""" dataset_urn = make_dataset_urn("postgres", "foo") field_urn = make_schema_field_urn(dataset_urn, "col1") - response = requests.get( - f"{get_gms_url()}/relationships?direction=INCOMING&urn={urllib.parse.quote(field_urn)}&types=Asserts" + response = auth_session.get( + f"{auth_session.gms_url()}/relationships?direction=INCOMING&urn={urllib.parse.quote(field_urn)}&types=Asserts" ) response.raise_for_status() @@ -352,11 +332,10 @@ def test_gms_get_assertions_on_dataset_field(): assert len(data["relationships"]) == 1 -@pytest.mark.dependency(depends=["test_healthchecks", "test_run_ingestion"]) -def test_gms_get_assertion_info(): +def test_gms_get_assertion_info(auth_session, test_run_ingestion): assertion_urn = "urn:li:assertion:2d3b06a6e77e1f24adc9860a05ea089b" - response = requests.get( - f"{get_gms_url()}/aspects/{urllib.parse.quote(assertion_urn)}\ + response = auth_session.get( + f"{auth_session.gms_url()}/aspects/{urllib.parse.quote(assertion_urn)}\ ?aspect=assertionInfo&version=0", headers=restli_default_headers, ) diff --git a/smoke-test/tests/assertions/custom_assertions_test.py b/smoke-test/tests/assertions/custom_assertions_test.py index 509f1cf0f04e0..2533618518564 100644 --- a/smoke-test/tests/assertions/custom_assertions_test.py +++ b/smoke-test/tests/assertions/custom_assertions_test.py @@ -4,15 +4,11 @@ import pytest from datahub.emitter.mce_builder import make_dataset_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.graph.client import ( - DatahubClientConfig, - DataHubGraph, - DataHubGraphConfig, -) +from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.schema_classes import StatusClass from tests.consistency_utils import wait_for_writes_to_sync -from tests.utils import delete_urn, get_gms_url, wait_for_healthcheck_util +from tests.utils import delete_urn restli_default_headers = { "X-RestLi-Protocol-Version": "2.0.0", @@ -21,29 +17,21 @@ TEST_DATASET_URN = make_dataset_urn(platform="postgres", name="foo_custom") -@pytest.fixture(scope="module", autouse=False) -def graph() -> DataHubGraph: - graph: DataHubGraph = DataHubGraph(config=DatahubClientConfig(server=get_gms_url())) - return graph - - -@pytest.fixture(scope="session") -def wait_for_healthchecks(): - wait_for_healthcheck_util() +@pytest.fixture(scope="module") +def test_data(graph_client): mcpw = MetadataChangeProposalWrapper( entityUrn=TEST_DATASET_URN, aspect=StatusClass(removed=False) ) - with DataHubGraph(DataHubGraphConfig()) as graph: - graph.emit(mcpw) + graph_client.emit(mcpw) yield - delete_urn(TEST_DATASET_URN) + delete_urn(graph_client, TEST_DATASET_URN) def test_create_update_delete_dataset_custom_assertion( - wait_for_healthchecks: Any, graph: DataHubGraph + test_data: Any, graph_client: DataHubGraph ) -> None: # Create custom assertion - resp = graph.upsert_custom_assertion( + resp = graph_client.upsert_custom_assertion( urn=None, entity_urn=TEST_DATASET_URN, type="My custom category", @@ -55,7 +43,7 @@ def test_create_update_delete_dataset_custom_assertion( assertion_urn = resp["urn"] # Update custom assertion - resp = graph.upsert_custom_assertion( + resp = graph_client.upsert_custom_assertion( urn=assertion_urn, entity_urn=TEST_DATASET_URN, type="My custom category", @@ -67,7 +55,7 @@ def test_create_update_delete_dataset_custom_assertion( wait_for_writes_to_sync() # Report custom assertion result for success - result_reported = graph.report_assertion_result( + result_reported = graph_client.report_assertion_result( urn=assertion_urn, timestamp_millis=0, type="SUCCESS", @@ -76,7 +64,7 @@ def test_create_update_delete_dataset_custom_assertion( assert result_reported # Report custom assertion result for error - result_reported = graph.report_assertion_result( + result_reported = graph_client.report_assertion_result( urn=assertion_urn, timestamp_millis=round(time.time() * 1000), type="ERROR", @@ -87,7 +75,7 @@ def test_create_update_delete_dataset_custom_assertion( assert result_reported # Report custom assertion result for failure - result_reported = graph.report_assertion_result( + result_reported = graph_client.report_assertion_result( urn=assertion_urn, timestamp_millis=round(time.time() * 1000), type="FAILURE", @@ -154,7 +142,7 @@ def test_create_update_delete_dataset_custom_assertion( } """ - dataset_assertions = graph.execute_graphql( + dataset_assertions = graph_client.execute_graphql( query=graphql_query_retrive_assertion, variables={"datasetUrn": TEST_DATASET_URN}, ) @@ -178,9 +166,9 @@ def test_create_update_delete_dataset_custom_assertion( assert assertions[0]["runEvents"]["failed"] == 1 assert assertions[0]["runEvents"]["runEvents"][0]["result"]["externalUrl"] - graph.delete_entity(assertion_urn, True) + graph_client.delete_entity(assertion_urn, True) - dataset_assertions = graph.execute_graphql( + dataset_assertions = graph_client.execute_graphql( query=graphql_query_retrive_assertion, variables={"datasetUrn": TEST_DATASET_URN}, ) diff --git a/smoke-test/tests/browse/browse_test.py b/smoke-test/tests/browse/browse_test.py index adeb6775a150d..bee0df5510c4a 100644 --- a/smoke-test/tests/browse/browse_test.py +++ b/smoke-test/tests/browse/browse_test.py @@ -1,6 +1,6 @@ import pytest -from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest +from tests.utils import delete_urns_from_file, ingest_file_via_rest TEST_DATASET_1_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-1,PROD)" TEST_DATASET_2_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-2,PROD)" @@ -8,23 +8,16 @@ @pytest.fixture(scope="module", autouse=False) -def ingest_cleanup_data(request): +def ingest_cleanup_data(graph_client, auth_session, request): print("ingesting browse test data") - ingest_file_via_rest("tests/browse/data.json") + ingest_file_via_rest(auth_session, "tests/browse/data.json") yield print("removing browse test data") - delete_urns_from_file("tests/browse/data.json") + delete_urns_from_file(graph_client, "tests/browse/data.json") -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_get_browse_paths(frontend_session, ingest_cleanup_data): +def test_get_browse_paths(auth_session, ingest_cleanup_data): # Iterate through each browse path, starting with the root get_browse_paths_query = """query browse($input: BrowseInput!) {\n @@ -52,8 +45,8 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=get_browse_paths_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=get_browse_paths_json ) response.raise_for_status() res_data = response.json() @@ -80,8 +73,8 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=get_browse_paths_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=get_browse_paths_json ) response.raise_for_status() res_data = response.json() @@ -116,8 +109,8 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=get_browse_paths_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=get_browse_paths_json ) response.raise_for_status() res_data = response.json() diff --git a/smoke-test/tests/cli/datahub_cli.py b/smoke-test/tests/cli/datahub_cli.py index a57cfd0b7b2be..099002195aa51 100644 --- a/smoke-test/tests/cli/datahub_cli.py +++ b/smoke-test/tests/cli/datahub_cli.py @@ -1,7 +1,6 @@ import json import pytest -from datahub.ingestion.graph.client import get_default_graph from datahub.metadata.schema_classes import ( BrowsePathsV2Class, EditableDatasetPropertiesClass, @@ -14,7 +13,7 @@ @pytest.fixture(autouse=True) -def test_setup(): +def test_setup(auth_session, graph_client): """Fixture to execute asserts before and after a test is run""" global ingested_dataset_run_id @@ -26,44 +25,41 @@ def test_setup(): env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" - client = get_default_graph() - session = client._session - gms_host = client.config.server + gms_host = graph_client.config.server - assert client.get_aspect(dataset_urn, BrowsePathsV2Class) is None - assert client.get_aspect(dataset_urn, EditableDatasetPropertiesClass) is None + assert graph_client.get_aspect(dataset_urn, BrowsePathsV2Class) is None + assert graph_client.get_aspect(dataset_urn, EditableDatasetPropertiesClass) is None ingested_dataset_run_id = ingest_file_via_rest( - "tests/cli/cli_test_data.json" + auth_session, "tests/cli/cli_test_data.json" ).config.run_id print("Setup ingestion id: " + ingested_dataset_run_id) - assert client.get_aspect(dataset_urn, BrowsePathsV2Class) is not None + assert graph_client.get_aspect(dataset_urn, BrowsePathsV2Class) is not None yield # Clean up rollback_url = f"{gms_host}/runs?action=rollback" - session.post( + auth_session.post( rollback_url, data=json.dumps( {"runId": ingested_editable_run_id, "dryRun": False, "hardDelete": True} ), ) - session.post( + auth_session.post( rollback_url, data=json.dumps( {"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": True} ), ) - assert client.get_aspect(dataset_urn, BrowsePathsV2Class) is None - assert client.get_aspect(dataset_urn, EditableDatasetPropertiesClass) is None + assert graph_client.get_aspect(dataset_urn, BrowsePathsV2Class) is None + assert graph_client.get_aspect(dataset_urn, EditableDatasetPropertiesClass) is None -@pytest.mark.dependency() -def test_rollback_editable(): +def test_rollback_editable(auth_session, graph_client): global ingested_dataset_run_id global ingested_editable_run_id platform = "urn:li:dataPlatform:kafka" @@ -71,28 +67,28 @@ def test_rollback_editable(): env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" - client = get_default_graph() - session = client._session - gms_host = client.config.server + gms_host = graph_client.config.server print("Ingested dataset id:", ingested_dataset_run_id) # Assert that second data ingestion worked - assert client.get_aspect(dataset_urn, BrowsePathsV2Class) is not None + assert graph_client.get_aspect(dataset_urn, BrowsePathsV2Class) is not None # Make editable change ingested_editable_run_id = ingest_file_via_rest( - "tests/cli/cli_editable_test_data.json" + auth_session, "tests/cli/cli_editable_test_data.json" ).config.run_id print("ingested editable id:", ingested_editable_run_id) # Assert that second data ingestion worked - assert client.get_aspect(dataset_urn, EditableDatasetPropertiesClass) is not None + assert ( + graph_client.get_aspect(dataset_urn, EditableDatasetPropertiesClass) is not None + ) # rollback ingestion 1 rollback_url = f"{gms_host}/runs?action=rollback" - session.post( + auth_session.post( rollback_url, data=json.dumps( {"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": False} @@ -103,7 +99,9 @@ def test_rollback_editable(): wait_for_writes_to_sync() # EditableDatasetProperties should still be part of the entity that was soft deleted. - assert client.get_aspect(dataset_urn, EditableDatasetPropertiesClass) is not None + assert ( + graph_client.get_aspect(dataset_urn, EditableDatasetPropertiesClass) is not None + ) # But first ingestion aspects should not be present - assert client.get_aspect(dataset_urn, BrowsePathsV2Class) is None + assert graph_client.get_aspect(dataset_urn, BrowsePathsV2Class) is None diff --git a/smoke-test/tests/cli/datahub_graph_test.py b/smoke-test/tests/cli/datahub_graph_test.py index 0af5572c7d1d9..e29492c0d3f9d 100644 --- a/smoke-test/tests/cli/datahub_graph_test.py +++ b/smoke-test/tests/cli/datahub_graph_test.py @@ -2,15 +2,10 @@ import pytest import tenacity -from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph +from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.schema_classes import KafkaSchemaClass, SchemaMetadataClass -from tests.utils import ( - delete_urns_from_file, - get_gms_url, - get_sleep_info, - ingest_file_via_rest, -) +from tests.utils import delete_urns_from_file, get_sleep_info, ingest_file_via_rest sleep_sec, sleep_times = get_sleep_info() @@ -20,27 +15,19 @@ @pytest.fixture(scope="module", autouse=False) -def ingest_cleanup_data(request): +def ingest_cleanup_data(graph_client, auth_session, request): print("removing graph test data") - delete_urns_from_file("tests/cli/graph_data.json") + delete_urns_from_file(graph_client, "tests/cli/graph_data.json") print("ingesting graph test data") - ingest_file_via_rest("tests/cli/graph_data.json") + ingest_file_via_rest(auth_session, "tests/cli/graph_data.json") yield print("removing graph test data") - delete_urns_from_file("tests/cli/graph_data.json") - - -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass + delete_urns_from_file(graph_client, "tests/cli/graph_data.json") -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_get_aspect_v2(frontend_session, ingest_cleanup_data): - client: DataHubGraph = DataHubGraph(DatahubClientConfig(server=get_gms_url())) +def test_get_aspect_v2(graph_client, ingest_cleanup_data): urn = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-rollback,PROD)" - schema_metadata: Optional[SchemaMetadataClass] = client.get_aspect_v2( + schema_metadata: Optional[SchemaMetadataClass] = graph_client.get_aspect_v2( urn, aspect="schemaMetadata", aspect_type=SchemaMetadataClass ) @@ -57,7 +44,7 @@ def test_get_aspect_v2(frontend_session, ingest_cleanup_data): @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_dataset_present_correctly(graph_client: DataHubGraph): +def _ensure_dataset_present_correctly(auth_session, graph_client: DataHubGraph): urn = "urn:li:dataset:(urn:li:dataPlatform:graph,graph-test,PROD)" json = { "query": """query getDataset($urn: String!) {\n @@ -92,7 +79,7 @@ def _ensure_dataset_present_correctly(graph_client: DataHubGraph): }""", "variables": {"urn": urn}, } - res_data = graph_client._post_generic("http://localhost:8080/api/graphql", json) + res_data = graph_client._post_generic(f"{auth_session.gms_url()}/api/graphql", json) assert res_data assert res_data["data"] @@ -101,11 +88,9 @@ def _ensure_dataset_present_correctly(graph_client: DataHubGraph): assert len(res_data["data"]["dataset"]["outgoing"]["relationships"]) == 3 -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_graph_relationships(): - delete_urns_from_file(graph) - delete_urns_from_file(graph_2) - ingest_file_via_rest(graph) - ingest_file_via_rest(graph_2) - graph_client: DataHubGraph = DataHubGraph(DatahubClientConfig(server=get_gms_url())) - _ensure_dataset_present_correctly(graph_client) +def test_graph_relationships(graph_client, auth_session): + delete_urns_from_file(graph_client, graph) + delete_urns_from_file(graph_client, graph_2) + ingest_file_via_rest(auth_session, graph) + ingest_file_via_rest(auth_session, graph_2) + _ensure_dataset_present_correctly(auth_session, graph_client) diff --git a/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py b/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py index 438c063f11fa8..8c526e32c7053 100644 --- a/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py +++ b/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py @@ -31,7 +31,7 @@ def sync_elastic() -> None: wait_for_writes_to_sync() -def datahub_put_profile(dataset_profile: DatasetProfileClass) -> None: +def datahub_put_profile(auth_session, dataset_profile: DatasetProfileClass) -> None: with tempfile.NamedTemporaryFile("w+t", suffix=".json") as aspect_file: aspect_text: str = json.dumps(pre_json_transform(dataset_profile.to_obj())) aspect_file.write(aspect_text) @@ -45,17 +45,32 @@ def datahub_put_profile(dataset_profile: DatasetProfileClass) -> None: "-d", aspect_file.name, ] - put_result = runner.invoke(datahub, put_args) + put_result = runner.invoke( + datahub, + put_args, + env={ + "DATAHUB_GMS_URL": auth_session.gms_url(), + "DATAHUB_GMS_TOKEN": auth_session.gms_token(), + }, + ) assert put_result.exit_code == 0 def datahub_get_and_verify_profile( + auth_session, expected_profile: Optional[DatasetProfileClass], ) -> None: # Wait for writes to stabilize in elastic sync_elastic() get_args: List[str] = ["get", "--urn", test_dataset_urn, "-a", test_aspect_name] - get_result: Result = runner.invoke(datahub, get_args) + get_result: Result = runner.invoke( + datahub, + get_args, + env={ + "DATAHUB_GMS_URL": auth_session.gms_url(), + "DATAHUB_GMS_TOKEN": auth_session.gms_token(), + }, + ) if expected_profile is None: assert get_result.exit_code != 0 @@ -76,20 +91,28 @@ def datahub_get_and_verify_profile( assert profile_from_get == expected_profile -def datahub_delete(params: List[str]) -> None: +def datahub_delete(auth_session, params: List[str]) -> None: sync_elastic() args: List[str] = ["delete"] args.extend(params) args.append("--hard") - delete_result: Result = runner.invoke(datahub, args, input="y\ny\n") + delete_result: Result = runner.invoke( + datahub, + args, + input="y\ny\n", + env={ + "DATAHUB_GMS_URL": auth_session.gms_url(), + "DATAHUB_GMS_TOKEN": auth_session.gms_token(), + }, + ) logger.info(delete_result.stdout) if delete_result.stderr: logger.error(delete_result.stderr) assert delete_result.exit_code == 0 -def test_timeseries_delete(wait_for_healthchecks: Any) -> None: +def test_timeseries_delete(auth_session: Any) -> None: num_test_profiles: int = 10 verification_batch_size: int = int(num_test_profiles / 2) num_latest_profiles_to_delete = 2 @@ -100,11 +123,11 @@ def test_timeseries_delete(wait_for_healthchecks: Any) -> None: # and validate using get. for i, dataset_profile in enumerate(gen_dataset_profiles(num_test_profiles)): # Use put command to ingest the aspect value. - datahub_put_profile(dataset_profile) + datahub_put_profile(auth_session, dataset_profile) # Validate against all ingested values once every verification_batch_size to reduce overall test time. Since we # are ingesting the aspects in the ascending order of timestampMillis, get should return the one just put. if (i % verification_batch_size) == 0: - datahub_get_and_verify_profile(dataset_profile) + datahub_get_and_verify_profile(auth_session, dataset_profile) # Init the params for time-range based deletion. if i == (num_test_profiles - num_latest_profiles_to_delete - 1): @@ -119,6 +142,7 @@ def test_timeseries_delete(wait_for_healthchecks: Any) -> None: ) # 2. Verify time-range based deletion. datahub_delete( + auth_session, [ "--urn", test_dataset_urn, @@ -131,8 +155,8 @@ def test_timeseries_delete(wait_for_healthchecks: Any) -> None: ], ) assert expected_profile_after_latest_deletion is not None - datahub_get_and_verify_profile(expected_profile_after_latest_deletion) + datahub_get_and_verify_profile(auth_session, expected_profile_after_latest_deletion) # 3. Delete everything via the delete command & validate that we don't get any profiles back. - datahub_delete(["-p", "test_platform"]) - datahub_get_and_verify_profile(None) + datahub_delete(auth_session, ["-p", "test_platform"]) + datahub_get_and_verify_profile(auth_session, None) diff --git a/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py b/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py index aa7c90cc6f988..4d148041e1a0b 100644 --- a/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py +++ b/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py @@ -1,5 +1,5 @@ import json -from typing import Any, Dict, List, Optional +from typing import Dict, List, Optional import datahub.emitter.mce_builder as builder from click.testing import CliRunner, Result @@ -16,14 +16,22 @@ def sync_elastic() -> None: wait_for_writes_to_sync() -def datahub_rollback(run_id: str) -> None: +def datahub_rollback(auth_session, run_id: str) -> None: sync_elastic() rollback_args: List[str] = ["ingest", "rollback", "--run-id", run_id, "-f"] - rollback_result: Result = runner.invoke(datahub, rollback_args) + rollback_result: Result = runner.invoke( + datahub, + rollback_args, + env={ + "DATAHUB_GMS_URL": auth_session.gms_url(), + "DATAHUB_GMS_TOKEN": auth_session.gms_token(), + }, + ) assert rollback_result.exit_code == 0 def datahub_get_and_verify_profile( + auth_session, urn: str, aspect_name: str, expected_profile: Optional[DatasetProfileClass], @@ -31,7 +39,14 @@ def datahub_get_and_verify_profile( # Wait for writes to stabilize in elastic sync_elastic() get_args: List[str] = ["get", "--urn", urn, "-a", aspect_name] - get_result: Result = runner.invoke(datahub, get_args) + get_result: Result = runner.invoke( + datahub, + get_args, + env={ + "DATAHUB_GMS_URL": auth_session.gms_url(), + "DATAHUB_GMS_TOKEN": auth_session.gms_token(), + }, + ) assert get_result.exit_code == 0 get_result_output_obj: Dict = json.loads(get_result.stdout) if expected_profile is None: @@ -44,9 +59,9 @@ def datahub_get_and_verify_profile( assert profile_from_get == expected_profile -def test_timeseries_rollback(wait_for_healthchecks: Any) -> None: +def test_timeseries_rollback(auth_session) -> None: pipeline = ingest_file_via_rest( - "tests/cli/ingest_cmd/test_timeseries_rollback.json" + auth_session, "tests/cli/ingest_cmd/test_timeseries_rollback.json" ) test_aspect_name: str = "datasetProfile" test_dataset_urn: str = builder.make_dataset_urn( @@ -54,5 +69,7 @@ def test_timeseries_rollback(wait_for_healthchecks: Any) -> None: "rollback_test_dataset", "TEST", ) - datahub_rollback(pipeline.config.run_id) - datahub_get_and_verify_profile(test_dataset_urn, test_aspect_name, None) + datahub_rollback(auth_session, pipeline.config.run_id) + datahub_get_and_verify_profile( + auth_session, test_dataset_urn, test_aspect_name, None + ) diff --git a/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py b/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py index 6be23b2567f67..502848a722902 100644 --- a/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py +++ b/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py @@ -7,7 +7,7 @@ from click.testing import CliRunner, Result from datahub.api.entities.corpgroup.corpgroup import CorpGroup from datahub.entrypoints import datahub -from datahub.ingestion.graph.client import DataHubGraph, get_default_graph +from datahub.ingestion.graph.client import DataHubGraph from tests.utils import wait_for_writes_to_sync @@ -18,7 +18,7 @@ def sync_elastic() -> None: wait_for_writes_to_sync() -def datahub_upsert_group(group: CorpGroup) -> None: +def datahub_upsert_group(auth_session: Any, group: CorpGroup) -> None: with tempfile.NamedTemporaryFile("w+t", suffix=".yaml") as group_file: yaml.dump(group.dict(), group_file) group_file.flush() @@ -28,7 +28,14 @@ def datahub_upsert_group(group: CorpGroup) -> None: "-f", group_file.name, ] - group_create_result = runner.invoke(datahub, upsert_args) + group_create_result = runner.invoke( + datahub, + upsert_args, + env={ + "DATAHUB_GMS_URL": auth_session.gms_url(), + "DATAHUB_GMS_TOKEN": auth_session.gms_token(), + }, + ) assert group_create_result.exit_code == 0 @@ -47,9 +54,16 @@ def gen_datahub_groups(num_groups: int) -> Iterable[CorpGroup]: yield group -def datahub_get_group(group_urn: str): +def datahub_get_group(auth_session, group_urn: str): get_args: List[str] = ["get", "--urn", group_urn] - get_result: Result = runner.invoke(datahub, get_args) + get_result: Result = runner.invoke( + datahub, + get_args, + env={ + "DATAHUB_GMS_URL": auth_session.gms_url(), + "DATAHUB_GMS_TOKEN": auth_session.gms_token(), + }, + ) assert get_result.exit_code == 0 try: get_result_output_obj: Dict = json.loads(get_result.stdout) @@ -59,9 +73,8 @@ def datahub_get_group(group_urn: str): raise e -def get_group_ownership(user_urn: str) -> List[str]: - graph = get_default_graph() - entities = graph.get_related_entities( +def get_group_ownership(graph_client: DataHubGraph, user_urn: str) -> List[str]: + entities = graph_client.get_related_entities( entity_urn=user_urn, relationship_types=["OwnedBy"], direction=DataHubGraph.RelationshipDirection.INCOMING, @@ -69,9 +82,8 @@ def get_group_ownership(user_urn: str) -> List[str]: return [entity.urn for entity in entities] -def get_group_membership(user_urn: str) -> List[str]: - graph = get_default_graph() - entities = graph.get_related_entities( +def get_group_membership(graph_client: DataHubGraph, user_urn: str) -> List[str]: + entities = graph_client.get_related_entities( entity_urn=user_urn, relationship_types=["IsMemberOfGroup"], direction=DataHubGraph.RelationshipDirection.OUTGOING, @@ -79,11 +91,11 @@ def get_group_membership(user_urn: str) -> List[str]: return [entity.urn for entity in entities] -def test_group_upsert(wait_for_healthchecks: Any) -> None: +def test_group_upsert(auth_session: Any, graph_client: DataHubGraph) -> None: num_groups: int = 10 for i, datahub_group in enumerate(gen_datahub_groups(num_groups)): - datahub_upsert_group(datahub_group) - group_dict = datahub_get_group(f"urn:li:corpGroup:group_{i}") + datahub_upsert_group(auth_session, datahub_group) + group_dict = datahub_get_group(auth_session, f"urn:li:corpGroup:group_{i}") assert group_dict == { "corpGroupEditableInfo": { "description": f"The Group {i}", @@ -112,8 +124,8 @@ def test_group_upsert(wait_for_healthchecks: Any) -> None: } sync_elastic() - groups_owned = get_group_ownership("urn:li:corpuser:user1") - groups_partof = get_group_membership("urn:li:corpuser:user2") + groups_owned = get_group_ownership(graph_client, "urn:li:corpuser:user1") + groups_partof = get_group_membership(graph_client, "urn:li:corpuser:user2") all_groups = sorted([f"urn:li:corpGroup:group_{i}" for i in range(0, num_groups)]) diff --git a/smoke-test/tests/cli/user_groups_cmd/test_user_cmd.py b/smoke-test/tests/cli/user_groups_cmd/test_user_cmd.py index 5d16b896a5404..3d0902451ebfc 100644 --- a/smoke-test/tests/cli/user_groups_cmd/test_user_cmd.py +++ b/smoke-test/tests/cli/user_groups_cmd/test_user_cmd.py @@ -11,7 +11,7 @@ runner = CliRunner(mix_stderr=False) -def datahub_upsert_user(user: CorpUser) -> None: +def datahub_upsert_user(auth_session, user: CorpUser) -> None: with tempfile.NamedTemporaryFile("w+t", suffix=".yaml") as user_file: yaml.dump(user.dict(), user_file) user_file.flush() @@ -21,7 +21,14 @@ def datahub_upsert_user(user: CorpUser) -> None: "-f", user_file.name, ] - user_create_result = runner.invoke(datahub, upsert_args) + user_create_result = runner.invoke( + datahub, + upsert_args, + env={ + "DATAHUB_GMS_URL": auth_session.gms_url(), + "DATAHUB_GMS_TOKEN": auth_session.gms_token(), + }, + ) assert user_create_result.exit_code == 0 @@ -43,9 +50,16 @@ def gen_datahub_users(num_users: int) -> Iterable[CorpUser]: yield user -def datahub_get_user(user_urn: str): +def datahub_get_user(auth_session: Any, user_urn: str): get_args: List[str] = ["get", "--urn", user_urn] - get_result: Result = runner.invoke(datahub, get_args) + get_result: Result = runner.invoke( + datahub, + get_args, + env={ + "DATAHUB_GMS_URL": auth_session.gms_url(), + "DATAHUB_GMS_TOKEN": auth_session.gms_token(), + }, + ) assert get_result.exit_code == 0 try: get_result_output_obj: Dict = json.loads(get_result.stdout) @@ -55,11 +69,11 @@ def datahub_get_user(user_urn: str): raise e -def test_user_upsert(wait_for_healthchecks: Any) -> None: +def test_user_upsert(auth_session: Any) -> None: num_user_profiles: int = 10 for i, datahub_user in enumerate(gen_datahub_users(num_user_profiles)): - datahub_upsert_user(datahub_user) - user_dict = datahub_get_user(f"urn:li:corpuser:user_{i}") + datahub_upsert_user(auth_session, datahub_user) + user_dict = datahub_get_user(auth_session, f"urn:li:corpuser:user_{i}") assert user_dict == { "corpUserEditableInfo": { "aboutMe": f"The User {i}", diff --git a/smoke-test/tests/conftest.py b/smoke-test/tests/conftest.py deleted file mode 100644 index 57b92a2db1c19..0000000000000 --- a/smoke-test/tests/conftest.py +++ /dev/null @@ -1,32 +0,0 @@ -import os - -import pytest - -from tests.test_result_msg import send_message -from tests.utils import get_frontend_session, wait_for_healthcheck_util - -# Disable telemetry -os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false" - - -@pytest.fixture(scope="session") -def wait_for_healthchecks(): - wait_for_healthcheck_util() - yield - - -@pytest.fixture(scope="session") -def frontend_session(wait_for_healthchecks): - yield get_frontend_session() - - -# TODO: Determine whether we need this or not. -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - -def pytest_sessionfinish(session, exitstatus): - """whole test run finishes.""" - send_message(exitstatus) diff --git a/smoke-test/tests/containers/containers_test.py b/smoke-test/tests/containers/containers_test.py index 4997102702e57..3d1217081fa6a 100644 --- a/smoke-test/tests/containers/containers_test.py +++ b/smoke-test/tests/containers/containers_test.py @@ -1,25 +1,19 @@ import pytest -from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest +from tests.utils import delete_urns_from_file, ingest_file_via_rest @pytest.fixture(scope="module", autouse=False) -def ingest_cleanup_data(request): +def ingest_cleanup_data(auth_session, graph_client, request): print("ingesting containers test data") - ingest_file_via_rest("tests/containers/data.json") + ingest_file_via_rest(auth_session, "tests/containers/data.json") yield print("removing containers test data") - delete_urns_from_file("tests/containers/data.json") + delete_urns_from_file(graph_client, "tests/containers/data.json") @pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_get_full_container(frontend_session, ingest_cleanup_data): +def test_get_full_container(auth_session, ingest_cleanup_data): container_urn = "urn:li:container:SCHEMA" container_name = "datahub_schema" container_description = "The DataHub schema" @@ -97,8 +91,8 @@ def test_get_full_container(frontend_session, ingest_cleanup_data): "variables": {"urn": container_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=get_container_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=get_container_json ) response.raise_for_status() res_data = response.json() @@ -124,8 +118,8 @@ def test_get_full_container(frontend_session, ingest_cleanup_data): assert container["glossaryTerms"] is None -@pytest.mark.dependency(depends=["test_healthchecks", "test_get_full_container"]) -def test_get_parent_container(frontend_session, ingest_cleanup_data): +@pytest.mark.dependency(depends=["test_get_full_container"]) +def test_get_parent_container(auth_session): dataset_urn = "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)" # Get count of existing secrets @@ -144,8 +138,8 @@ def test_get_parent_container(frontend_session, ingest_cleanup_data): "variables": {"urn": dataset_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=get_dataset_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=get_dataset_json ) response.raise_for_status() res_data = response.json() @@ -159,8 +153,8 @@ def test_get_parent_container(frontend_session, ingest_cleanup_data): assert dataset["container"]["properties"]["name"] == "datahub_schema" -@pytest.mark.dependency(depends=["test_healthchecks", "test_get_full_container"]) -def test_update_container(frontend_session, ingest_cleanup_data): +@pytest.mark.dependency(depends=["test_get_full_container"]) +def test_update_container(auth_session): container_urn = "urn:li:container:SCHEMA" new_tag = "urn:li:tag:Test" @@ -177,8 +171,8 @@ def test_update_container(frontend_session, ingest_cleanup_data): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=add_tag_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=add_tag_json ) response.raise_for_status() res_data = response.json() @@ -201,8 +195,8 @@ def test_update_container(frontend_session, ingest_cleanup_data): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=add_term_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=add_term_json ) response.raise_for_status() res_data = response.json() @@ -227,8 +221,8 @@ def test_update_container(frontend_session, ingest_cleanup_data): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=add_owner_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=add_owner_json ) response.raise_for_status() res_data = response.json() @@ -252,8 +246,8 @@ def test_update_container(frontend_session, ingest_cleanup_data): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=add_link_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=add_link_json ) response.raise_for_status() res_data = response.json() @@ -276,8 +270,8 @@ def test_update_container(frontend_session, ingest_cleanup_data): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=update_description_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=update_description_json ) response.raise_for_status() res_data = response.json() @@ -327,8 +321,8 @@ def test_update_container(frontend_session, ingest_cleanup_data): "variables": {"urn": container_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=get_container_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=get_container_json ) response.raise_for_status() res_data = response.json() diff --git a/smoke-test/tests/cypress/integration_test.py b/smoke-test/tests/cypress/integration_test.py index 4124ced999446..0d824a96810d0 100644 --- a/smoke-test/tests/cypress/integration_test.py +++ b/smoke-test/tests/cypress/integration_test.py @@ -117,7 +117,7 @@ def print_now(): print(f"current time is {datetime.datetime.now(datetime.timezone.utc)}") -def ingest_data(): +def ingest_data(auth_session, graph_client): print_now() print("creating onboarding data file") create_datahub_step_state_aspects( @@ -128,26 +128,38 @@ def ingest_data(): print_now() print("ingesting test data") - ingest_file_via_rest(f"{CYPRESS_TEST_DATA_DIR}/{TEST_DATA_FILENAME}") - ingest_file_via_rest(f"{CYPRESS_TEST_DATA_DIR}/{TEST_DBT_DATA_FILENAME}") - ingest_file_via_rest(f"{CYPRESS_TEST_DATA_DIR}/{TEST_PATCH_DATA_FILENAME}") - ingest_file_via_rest(f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}") - ingest_time_lineage() + ingest_file_via_rest(auth_session, f"{CYPRESS_TEST_DATA_DIR}/{TEST_DATA_FILENAME}") + ingest_file_via_rest( + auth_session, f"{CYPRESS_TEST_DATA_DIR}/{TEST_DBT_DATA_FILENAME}" + ) + ingest_file_via_rest( + auth_session, f"{CYPRESS_TEST_DATA_DIR}/{TEST_PATCH_DATA_FILENAME}" + ) + ingest_file_via_rest( + auth_session, f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}" + ) + ingest_time_lineage(graph_client) print_now() print("completed ingesting test data") @pytest.fixture(scope="module", autouse=True) -def ingest_cleanup_data(): - ingest_data() +def ingest_cleanup_data(auth_session, graph_client): + ingest_data(auth_session, graph_client) yield print_now() print("removing test data") - delete_urns_from_file(f"{CYPRESS_TEST_DATA_DIR}/{TEST_DATA_FILENAME}") - delete_urns_from_file(f"{CYPRESS_TEST_DATA_DIR}/{TEST_DBT_DATA_FILENAME}") - delete_urns_from_file(f"{CYPRESS_TEST_DATA_DIR}/{TEST_PATCH_DATA_FILENAME}") - delete_urns_from_file(f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}") - delete_urns(get_time_lineage_urns()) + delete_urns_from_file(graph_client, f"{CYPRESS_TEST_DATA_DIR}/{TEST_DATA_FILENAME}") + delete_urns_from_file( + graph_client, f"{CYPRESS_TEST_DATA_DIR}/{TEST_DBT_DATA_FILENAME}" + ) + delete_urns_from_file( + graph_client, f"{CYPRESS_TEST_DATA_DIR}/{TEST_PATCH_DATA_FILENAME}" + ) + delete_urns_from_file( + graph_client, f"{CYPRESS_TEST_DATA_DIR}/{TEST_ONBOARDING_DATA_FILENAME}" + ) + delete_urns(graph_client, get_time_lineage_urns()) print_now() print("deleting onboarding data file") @@ -163,7 +175,7 @@ def _get_spec_map(items: Set[str]) -> str: return ",".join([f"**/{item}/*.js" for item in items]) -def test_run_cypress(frontend_session, wait_for_healthchecks): +def test_run_cypress(auth_session): # Run with --record option only if CYPRESS_RECORD_KEY is non-empty record_key = os.getenv("CYPRESS_RECORD_KEY") tag_arg = "" diff --git a/smoke-test/tests/dataproduct/test_dataproduct.py b/smoke-test/tests/dataproduct/test_dataproduct.py index 88a1c42ddca03..c26b443dd34a7 100644 --- a/smoke-test/tests/dataproduct/test_dataproduct.py +++ b/smoke-test/tests/dataproduct/test_dataproduct.py @@ -10,7 +10,7 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext, RecordEnvelope from datahub.ingestion.api.sink import NoopWriteCallback -from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph +from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.sink.file import FileSink, FileSinkConfig from datahub.metadata.schema_classes import ( DataProductPropertiesClass, @@ -22,7 +22,6 @@ from tests.utils import ( delete_urns_from_file, - get_gms_url, get_sleep_info, ingest_file_via_rest, wait_for_writes_to_sync, @@ -86,36 +85,30 @@ def create_test_data(filename: str): @pytest.fixture(scope="module", autouse=False) -def ingest_cleanup_data(request): +def ingest_cleanup_data(auth_session, graph_client, request): new_file, filename = tempfile.mkstemp(suffix=".json") try: create_test_data(filename) print("ingesting data products test data") - ingest_file_via_rest(filename) + ingest_file_via_rest(auth_session, filename) yield print("removing data products test data") - delete_urns_from_file(filename) + delete_urns_from_file(graph_client, filename) wait_for_writes_to_sync() finally: os.remove(filename) -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - def get_gql_query(filename: str) -> str: with open(filename) as fp: return fp.read() def validate_listing( - graph: DataHubGraph, data_product_urn: str, dataset_urns: List[str] + graph_client: DataHubGraph, data_product_urn: str, dataset_urns: List[str] ) -> None: # Validate listing - result = graph.execute_graphql( + result = graph_client.execute_graphql( get_gql_query("tests/dataproduct/queries/list_dataproduct_assets.graphql"), {"urn": data_product_urn, "input": {"query": "*", "start": 0, "count": 20}}, ) @@ -128,12 +121,12 @@ def validate_listing( def validate_relationships( - graph: DataHubGraph, data_product_urn: str, dataset_urns: List[str] + graph_client: DataHubGraph, data_product_urn: str, dataset_urns: List[str] ) -> None: # Validate relationships urn_match = {k: False for k in dataset_urns} for dataset_urn in dataset_urns: - for e in graph.get_related_entities( + for e in graph_client.get_related_entities( dataset_urn, relationship_types=["DataProductContains"], direction=DataHubGraph.RelationshipDirection.INCOMING, @@ -147,7 +140,7 @@ def validate_relationships( ), "All dataset urns should have a DataProductContains relationship to the data product" dataset_urns_matched = set() - for e in graph.get_related_entities( + for e in graph_client.get_related_entities( data_product_urn, relationship_types=["DataProductContains"], direction=DataHubGraph.RelationshipDirection.OUTGOING, @@ -162,11 +155,10 @@ def validate_relationships( @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_create_data_product(ingest_cleanup_data): +def test_create_data_product(graph_client, ingest_cleanup_data): domain_urn = Urn("domain", [datahub_guid({"name": "Marketing"})]) - graph: DataHubGraph = DataHubGraph(config=DatahubClientConfig(server=get_gms_url())) - result = graph.execute_graphql( + + result = graph_client.execute_graphql( get_gql_query("tests/dataproduct/queries/add_dataproduct.graphql"), { "domainUrn": str(domain_urn), @@ -177,22 +169,26 @@ def test_create_data_product(ingest_cleanup_data): assert "createDataProduct" in result data_product_urn = result["createDataProduct"]["urn"] # Data Product Properties - data_product_props = graph.get_aspect(data_product_urn, DataProductPropertiesClass) + data_product_props = graph_client.get_aspect( + data_product_urn, DataProductPropertiesClass + ) assert data_product_props is not None assert data_product_props.description == "Test Description" assert data_product_props.name == "Test Data Product" # Domain assignment - domains = graph.get_aspect(data_product_urn, DomainsClass) + domains = graph_client.get_aspect(data_product_urn, DomainsClass) assert domains and domains.domains[0] == str(domain_urn) # Add assets - result = graph.execute_graphql( + result = graph_client.execute_graphql( get_gql_query("tests/dataproduct/queries/setassets_dataproduct.graphql"), {"dataProductUrn": data_product_urn, "resourceUrns": dataset_urns}, ) assert "batchSetDataProduct" in result assert result["batchSetDataProduct"] is True - data_product_props = graph.get_aspect(data_product_urn, DataProductPropertiesClass) + data_product_props = graph_client.get_aspect( + data_product_urn, DataProductPropertiesClass + ) assert data_product_props is not None assert data_product_props.assets is not None assert data_product_props.description == "Test Description" @@ -204,11 +200,11 @@ def test_create_data_product(ingest_cleanup_data): wait_for_writes_to_sync() - validate_listing(graph, data_product_urn, dataset_urns) - validate_relationships(graph, data_product_urn, dataset_urns) + validate_listing(graph_client, data_product_urn, dataset_urns) + validate_relationships(graph_client, data_product_urn, dataset_urns) # Update name and description - result = graph.execute_graphql( + result = graph_client.execute_graphql( get_gql_query("tests/dataproduct/queries/update_dataproduct.graphql"), { "urn": data_product_urn, @@ -219,28 +215,30 @@ def test_create_data_product(ingest_cleanup_data): wait_for_writes_to_sync() # Data Product Properties - data_product_props = graph.get_aspect(data_product_urn, DataProductPropertiesClass) + data_product_props = graph_client.get_aspect( + data_product_urn, DataProductPropertiesClass + ) assert data_product_props is not None assert data_product_props.description == "New Description" assert data_product_props.name == "New Test Data Product" assert data_product_props.assets is not None assert len(data_product_props.assets) == len(dataset_urns) - validate_listing(graph, data_product_urn, dataset_urns) - validate_relationships(graph, data_product_urn, dataset_urns) + validate_listing(graph_client, data_product_urn, dataset_urns) + validate_relationships(graph_client, data_product_urn, dataset_urns) # delete dataproduct - result = graph.execute_graphql( + result = graph_client.execute_graphql( get_gql_query("tests/dataproduct/queries/delete_dataproduct.graphql"), {"urn": data_product_urn}, ) wait_for_writes_to_sync() - assert graph.exists(data_product_urn) is False + assert graph_client.exists(data_product_urn) is False # Validate relationships are removed urn_match = {k: False for k in dataset_urns} for dataset_urn in dataset_urns: - for e in graph.get_related_entities( + for e in graph_client.get_related_entities( dataset_urn, relationship_types=["DataProductContains"], direction=DataHubGraph.RelationshipDirection.INCOMING, diff --git a/smoke-test/tests/delete/delete_test.py b/smoke-test/tests/delete/delete_test.py index 3a999224fd3e6..a13fc883b4055 100644 --- a/smoke-test/tests/delete/delete_test.py +++ b/smoke-test/tests/delete/delete_test.py @@ -6,9 +6,7 @@ from tests.utils import ( delete_urns_from_file, - get_datahub_graph, ingest_file_via_rest, - wait_for_healthcheck_util, wait_for_writes_to_sync, ) @@ -16,20 +14,8 @@ os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false" -@pytest.fixture(scope="session") -def wait_for_healthchecks(): - wait_for_healthcheck_util() - yield - - -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - @pytest.fixture(autouse=False) -def test_setup(): +def test_setup(auth_session, graph_client): """Fixture to execute asserts before and after a test is run""" platform = "urn:li:dataPlatform:kafka" @@ -38,9 +24,8 @@ def test_setup(): env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" - client = get_datahub_graph() - session = client._session - gms_host = client.config.server + session = graph_client._session + gms_host = graph_client.config.server try: assert "institutionalMemory" not in get_aspects_for_entity( @@ -58,11 +43,11 @@ def test_setup(): typed=False, ) except Exception as e: - delete_urns_from_file("tests/delete/cli_test_data.json") + delete_urns_from_file(graph_client, "tests/delete/cli_test_data.json") raise e ingested_dataset_run_id = ingest_file_via_rest( - "tests/delete/cli_test_data.json" + auth_session, "tests/delete/cli_test_data.json" ).config.run_id assert "institutionalMemory" in get_aspects_for_entity( @@ -101,7 +86,7 @@ def test_setup(): @pytest.mark.dependency() -def test_delete_reference(test_setup, depends=["test_healthchecks"]): +def test_delete_reference(graph_client, test_setup): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-delete" @@ -109,10 +94,8 @@ def test_delete_reference(test_setup, depends=["test_healthchecks"]): dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" tag_urn = "urn:li:tag:NeedsDocs" - graph = get_datahub_graph() - # Validate that the ingested tag is being referenced by the dataset - references_count, related_aspects = graph.delete_references_to_urn( + references_count, related_aspects = graph_client.delete_references_to_urn( tag_urn, dry_run=True ) print("reference count: " + str(references_count)) @@ -121,12 +104,12 @@ def test_delete_reference(test_setup, depends=["test_healthchecks"]): assert related_aspects[0]["entity"] == dataset_urn # Delete references to the tag - graph.delete_references_to_urn(tag_urn, dry_run=False) + graph_client.delete_references_to_urn(tag_urn, dry_run=False) wait_for_writes_to_sync() # Validate that references no longer exist - references_count, related_aspects = graph.delete_references_to_urn( + references_count, related_aspects = graph_client.delete_references_to_urn( tag_urn, dry_run=True ) assert references_count == 0 diff --git a/smoke-test/tests/deprecation/deprecation_test.py b/smoke-test/tests/deprecation/deprecation_test.py index ae3890aeda956..0377ddd05c389 100644 --- a/smoke-test/tests/deprecation/deprecation_test.py +++ b/smoke-test/tests/deprecation/deprecation_test.py @@ -1,30 +1,19 @@ import pytest -from tests.utils import ( - delete_urns_from_file, - get_frontend_url, - get_root_urn, - ingest_file_via_rest, -) +from tests.utils import delete_urns_from_file, get_root_urn, ingest_file_via_rest @pytest.fixture(scope="module", autouse=True) -def ingest_cleanup_data(request): +def ingest_cleanup_data(auth_session, graph_client, request): print("ingesting deprecation test data") - ingest_file_via_rest("tests/deprecation/data.json") + ingest_file_via_rest(auth_session, "tests/deprecation/data.json") yield print("removing deprecation test data") - delete_urns_from_file("tests/deprecation/data.json") + delete_urns_from_file(graph_client, "tests/deprecation/data.json") @pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_update_deprecation_all_fields(frontend_session): +def test_update_deprecation_all_fields(auth_session): dataset_urn = ( "urn:li:dataset:(urn:li:dataPlatform:kafka,test-tags-terms-sample-kafka,PROD)" ) @@ -44,8 +33,8 @@ def test_update_deprecation_all_fields(frontend_session): } # Fetch tags - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json ) response.raise_for_status() res_data = response.json() @@ -69,8 +58,8 @@ def test_update_deprecation_all_fields(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=update_deprecation_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=update_deprecation_json ) response.raise_for_status() res_data = response.json() @@ -80,8 +69,8 @@ def test_update_deprecation_all_fields(frontend_session): assert res_data["data"]["updateDeprecation"] is True # Refetch the dataset - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json ) response.raise_for_status() res_data = response.json() @@ -97,10 +86,8 @@ def test_update_deprecation_all_fields(frontend_session): } -@pytest.mark.dependency( - depends=["test_healthchecks", "test_update_deprecation_all_fields"] -) -def test_update_deprecation_partial_fields(frontend_session, ingest_cleanup_data): +@pytest.mark.dependency(depends=["test_update_deprecation_all_fields"]) +def test_update_deprecation_partial_fields(auth_session, ingest_cleanup_data): dataset_urn = ( "urn:li:dataset:(urn:li:dataPlatform:kafka,test-tags-terms-sample-kafka,PROD)" ) @@ -112,8 +99,8 @@ def test_update_deprecation_partial_fields(frontend_session, ingest_cleanup_data "variables": {"input": {"urn": dataset_urn, "deprecated": False}}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=update_deprecation_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=update_deprecation_json ) response.raise_for_status() res_data = response.json() @@ -137,8 +124,8 @@ def test_update_deprecation_partial_fields(frontend_session, ingest_cleanup_data "variables": {"urn": dataset_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json ) response.raise_for_status() res_data = response.json() diff --git a/smoke-test/tests/domains/domains_test.py b/smoke-test/tests/domains/domains_test.py index 1d83b032d7a8f..921f6948b2040 100644 --- a/smoke-test/tests/domains/domains_test.py +++ b/smoke-test/tests/domains/domains_test.py @@ -1,39 +1,27 @@ import pytest import tenacity -from tests.utils import ( - delete_urns_from_file, - get_frontend_url, - get_gms_url, - get_sleep_info, - ingest_file_via_rest, -) +from tests.utils import delete_urns_from_file, get_sleep_info, ingest_file_via_rest sleep_sec, sleep_times = get_sleep_info() @pytest.fixture(scope="module", autouse=False) -def ingest_cleanup_data(request): +def ingest_cleanup_data(auth_session, graph_client, request): print("ingesting domains test data") - ingest_file_via_rest("tests/domains/data.json") + ingest_file_via_rest(auth_session, "tests/domains/data.json") yield print("removing domains test data") - delete_urns_from_file("tests/domains/data.json") - - -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass + delete_urns_from_file(graph_client, "tests/domains/data.json") @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_more_domains(frontend_session, list_domains_json, before_count): +def _ensure_more_domains(auth_session, list_domains_json, before_count): # Get new count of Domains - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=list_domains_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=list_domains_json ) response.raise_for_status() res_data = response.json() @@ -49,11 +37,12 @@ def _ensure_more_domains(frontend_session, list_domains_json, before_count): assert after_count == before_count + 1 -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_create_list_get_domain(frontend_session): +@pytest.mark.dependency() +def test_create_list_get_domain(auth_session): # Setup: Delete the domain (if exists) - response = frontend_session.post( - f"{get_gms_url()}/entities?action=delete", json={"urn": "urn:li:domain:test id"} + response = auth_session.post( + f"{auth_session.gms_url()}/entities?action=delete", + json={"urn": "urn:li:domain:test id"}, ) # Get count of existing secrets @@ -74,8 +63,8 @@ def test_create_list_get_domain(frontend_session): "variables": {"input": {"start": "0", "count": "20"}}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=list_domains_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=list_domains_json ) response.raise_for_status() res_data = response.json() @@ -107,8 +96,8 @@ def test_create_list_get_domain(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=create_domain_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=create_domain_json ) response.raise_for_status() res_data = response.json() @@ -121,7 +110,7 @@ def test_create_list_get_domain(frontend_session): domain_urn = res_data["data"]["createDomain"] _ensure_more_domains( - frontend_session=frontend_session, + auth_session=auth_session, list_domains_json=list_domains_json, before_count=before_count, ) @@ -141,8 +130,8 @@ def test_create_list_get_domain(frontend_session): "variables": {"urn": domain_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=get_domain_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=get_domain_json ) response.raise_for_status() res_data = response.json() @@ -161,15 +150,15 @@ def test_create_list_get_domain(frontend_session): delete_json = {"urn": domain_urn} # Cleanup: Delete the domain - response = frontend_session.post( - f"{get_gms_url()}/entities?action=delete", json=delete_json + response = auth_session.post( + f"{auth_session.gms_url()}/entities?action=delete", json=delete_json ) response.raise_for_status() -@pytest.mark.dependency(depends=["test_healthchecks", "test_create_list_get_domain"]) -def test_set_unset_domain(frontend_session, ingest_cleanup_data): +@pytest.mark.dependency(depends=["test_create_list_get_domain"]) +def test_set_unset_domain(auth_session, ingest_cleanup_data): # Set and Unset a Domain for a dataset. Note that this doesn't test for adding domains to charts, dashboards, charts, & jobs. dataset_urn = ( "urn:li:dataset:(urn:li:dataPlatform:kafka,test-tags-terms-sample-kafka,PROD)" @@ -183,8 +172,8 @@ def test_set_unset_domain(frontend_session, ingest_cleanup_data): "variables": {"entityUrn": dataset_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=unset_domain_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=unset_domain_json ) response.raise_for_status() res_data = response.json() @@ -201,8 +190,8 @@ def test_set_unset_domain(frontend_session, ingest_cleanup_data): "variables": {"entityUrn": dataset_urn, "domainUrn": domain_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=set_domain_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=set_domain_json ) response.raise_for_status() res_data = response.json() @@ -230,8 +219,8 @@ def test_set_unset_domain(frontend_session, ingest_cleanup_data): "variables": {"urn": dataset_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=get_dataset_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=get_dataset_json ) response.raise_for_status() res_data = response.json() diff --git a/smoke-test/tests/incidents/incidents_test.py b/smoke-test/tests/incidents/incidents_test.py index 0f75377ce6576..864593c2e505f 100644 --- a/smoke-test/tests/incidents/incidents_test.py +++ b/smoke-test/tests/incidents/incidents_test.py @@ -2,27 +2,16 @@ import pytest -from tests.utils import ( - delete_urns_from_file, - get_frontend_url, - get_gms_url, - ingest_file_via_rest, -) +from tests.utils import delete_urns_from_file, ingest_file_via_rest @pytest.fixture(scope="module", autouse=True) -def ingest_cleanup_data(request): +def ingest_cleanup_data(auth_session, graph_client, request): print("ingesting incidents test data") - ingest_file_via_rest("tests/incidents/data.json") + ingest_file_via_rest(auth_session, "tests/incidents/data.json") yield print("removing incidents test data") - delete_urns_from_file("tests/incidents/data.json") - - -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass + delete_urns_from_file(graph_client, "tests/incidents/data.json") TEST_DATASET_URN = ( @@ -31,8 +20,8 @@ def test_healthchecks(wait_for_healthchecks): TEST_INCIDENT_URN = "urn:li:incident:test" -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_list_dataset_incidents(frontend_session): +@pytest.mark.dependency() +def test_list_dataset_incidents(auth_session): # Sleep for eventual consistency (not ideal) time.sleep(2) @@ -82,8 +71,9 @@ def test_list_dataset_incidents(frontend_session): "variables": {"urn": TEST_DATASET_URN}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=list_dataset_incidents_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", + json=list_dataset_incidents_json, ) response.raise_for_status() res_data = response.json() @@ -118,14 +108,8 @@ def test_list_dataset_incidents(frontend_session): } -@pytest.mark.dependency( - depends=[ - "test_healthchecks", - "test_list_dataset_incidents", - "test_search_all_incidents", - ] -) -def test_raise_resolve_incident(frontend_session): +@pytest.mark.dependency(depends=["test_list_dataset_incidents"]) +def test_raise_resolve_incident(auth_session): # Raise new incident raise_incident_json = { "query": """mutation raiseIncident($input: RaiseIncidentInput!) {\n @@ -142,8 +126,8 @@ def test_raise_resolve_incident(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=raise_incident_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=raise_incident_json ) response.raise_for_status() res_data = response.json() @@ -169,8 +153,8 @@ def test_raise_resolve_incident(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=update_incident_status + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=update_incident_status ) response.raise_for_status() res_data = response.json() @@ -220,8 +204,9 @@ def test_raise_resolve_incident(frontend_session): "variables": {"urn": TEST_DATASET_URN}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=list_dataset_incidents_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", + json=list_dataset_incidents_json, ) response.raise_for_status() res_data = response.json() @@ -246,8 +231,8 @@ def test_raise_resolve_incident(frontend_session): delete_json = {"urn": new_incident_urn} # Cleanup: Delete the incident - response = frontend_session.post( - f"{get_gms_url()}/entities?action=delete", json=delete_json + response = auth_session.post( + f"{auth_session.gms_url()}/entities?action=delete", json=delete_json ) response.raise_for_status() diff --git a/smoke-test/tests/lineage/test_lineage.py b/smoke-test/tests/lineage/test_lineage.py index c9895568a7140..8757741d1cb23 100644 --- a/smoke-test/tests/lineage/test_lineage.py +++ b/smoke-test/tests/lineage/test_lineage.py @@ -7,7 +7,7 @@ import networkx as nx import pytest from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.graph.client import DataHubGraph, get_default_graph +from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.schema_classes import ( AuditStampClass, ChangeAuditStampsClass, @@ -76,9 +76,9 @@ def emit_mcp(self, mcp: MetadataChangeProposalWrapper) -> None: INFINITE_HOPS: int = -1 -@pytest.mark.dependency(depends="wait_for_healthchecks") -def ingest_tableau_cll_via_rest(wait_for_healthchecks) -> None: +def ingest_tableau_cll_via_rest(auth_session) -> None: ingest_file_via_rest( + auth_session, "tests/lineage/tableau_cll_mcps.json", ) @@ -808,12 +808,6 @@ def test_expectation(self, graph: DataHubGraph) -> bool: return False -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - # @tenacity.retry( # stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) # ) @@ -833,9 +827,8 @@ def test_healthchecks(wait_for_healthchecks): # TODO - convert this to range of 1 to 10 to make sure we can handle large graphs ], ) -@pytest.mark.dependency(depends=["test_healthchecks"]) def test_lineage_via_node( - lineage_style: Scenario.LineageStyle, graph_level: int + graph_client: DataHubGraph, lineage_style: Scenario.LineageStyle, graph_level: int ) -> None: scenario: Scenario = Scenario( hop_platform_map={0: "mysql", 1: "snowflake"}, @@ -845,22 +838,21 @@ def test_lineage_via_node( ) # Create an emitter to the GMS REST API. - with get_default_graph() as graph: - emitter = graph - # emitter = DataHubConsoleEmitter() + emitter = graph_client + # emitter = DataHubConsoleEmitter() - # Emit metadata! - for mcp in scenario.get_entity_mcps(): - emitter.emit_mcp(mcp) + # Emit metadata! + for mcp in scenario.get_entity_mcps(): + emitter.emit_mcp(mcp) - for mcps in scenario.get_lineage_mcps(): - emitter.emit_mcp(mcps) + for mcps in scenario.get_lineage_mcps(): + emitter.emit_mcp(mcps) - wait_for_writes_to_sync() - try: - scenario.test_expectation(graph) - finally: - scenario.cleanup(DataHubGraphDeleteAgent(graph)) + wait_for_writes_to_sync() + try: + scenario.test_expectation(graph_client) + finally: + scenario.cleanup(DataHubGraphDeleteAgent(graph_client)) @pytest.fixture(scope="module") @@ -881,76 +873,78 @@ def destination_urn_fixture(): return "urn:li:dataset:(urn:li:dataPlatform:external,sales target %28us%29.xlsx.sheet1,PROD)" -@pytest.mark.dependency(depends=["test_healthchecks"]) @pytest.fixture(scope="module", autouse=False) def ingest_multipath_metadata( - chart_urn_fixture, intermediates_fixture, destination_urn_fixture + graph_client: DataHubGraph, + chart_urn_fixture, + intermediates_fixture, + destination_urn_fixture, ): fake_auditstamp = AuditStampClass( time=int(time.time() * 1000), actor="urn:li:corpuser:datahub", ) - with get_default_graph() as graph: - chart_urn = chart_urn_fixture - intermediates = intermediates_fixture - destination_urn = destination_urn_fixture + + chart_urn = chart_urn_fixture + intermediates = intermediates_fixture + destination_urn = destination_urn_fixture + for mcp in MetadataChangeProposalWrapper.construct_many( + entityUrn=destination_urn, + aspects=[ + DatasetPropertiesClass( + name="sales target (us).xlsx.sheet1", + ), + ], + ): + graph_client.emit_mcp(mcp) + + for intermediate in intermediates: for mcp in MetadataChangeProposalWrapper.construct_many( - entityUrn=destination_urn, + entityUrn=intermediate, aspects=[ DatasetPropertiesClass( - name="sales target (us).xlsx.sheet1", + name="intermediate", ), - ], - ): - graph.emit_mcp(mcp) - - for intermediate in intermediates: - for mcp in MetadataChangeProposalWrapper.construct_many( - entityUrn=intermediate, - aspects=[ - DatasetPropertiesClass( - name="intermediate", - ), - UpstreamLineageClass( - upstreams=[ - UpstreamClass( - dataset=destination_urn, - type="TRANSFORMED", - ) - ] - ), - ], - ): - graph.emit_mcp(mcp) - - for mcp in MetadataChangeProposalWrapper.construct_many( - entityUrn=chart_urn, - aspects=[ - ChartInfoClass( - title="chart", - description="chart", - lastModified=ChangeAuditStampsClass(created=fake_auditstamp), - inputEdges=[ - EdgeClass( - destinationUrn=intermediate_entity, - sourceUrn=chart_urn, + UpstreamLineageClass( + upstreams=[ + UpstreamClass( + dataset=destination_urn, + type="TRANSFORMED", ) - for intermediate_entity in intermediates - ], - ) + ] + ), ], ): - graph.emit_mcp(mcp) - wait_for_writes_to_sync() - yield - for urn in [chart_urn] + intermediates + [destination_urn]: - graph.delete_entity(urn, hard=True) - wait_for_writes_to_sync() + graph_client.emit_mcp(mcp) + + for mcp in MetadataChangeProposalWrapper.construct_many( + entityUrn=chart_urn, + aspects=[ + ChartInfoClass( + title="chart", + description="chart", + lastModified=ChangeAuditStampsClass(created=fake_auditstamp), + inputEdges=[ + EdgeClass( + destinationUrn=intermediate_entity, + sourceUrn=chart_urn, + ) + for intermediate_entity in intermediates + ], + ) + ], + ): + graph_client.emit_mcp(mcp) + wait_for_writes_to_sync() + yield + for urn in [chart_urn] + intermediates + [destination_urn]: + graph_client.delete_entity(urn, hard=True) + wait_for_writes_to_sync() # TODO: Reenable once fixed -# @pytest.mark.dependency(depends=["test_healthchecks"]) # def test_simple_lineage_multiple_paths( +# graph_client: DataHubGraph, # ingest_multipath_metadata, # chart_urn_fixture, # intermediates_fixture, @@ -960,7 +954,7 @@ def ingest_multipath_metadata( # intermediates = intermediates_fixture # destination_urn = destination_urn_fixture # results = search_across_lineage( -# get_default_graph(), +# graph_client, # chart_urn, # direction="UPSTREAM", # convert_schema_fields_to_datasets=True, diff --git a/smoke-test/tests/managed_ingestion/managed_ingestion_test.py b/smoke-test/tests/managed_ingestion/managed_ingestion_test.py index 4a4bdca2e4592..5d6179de6be64 100644 --- a/smoke-test/tests/managed_ingestion/managed_ingestion_test.py +++ b/smoke-test/tests/managed_ingestion/managed_ingestion_test.py @@ -3,24 +3,12 @@ import pytest import tenacity -from tests.utils import get_frontend_url, get_sleep_info, wait_for_healthcheck_util +from tests.utils import get_sleep_info sleep_sec, sleep_times = get_sleep_info() -@pytest.fixture(scope="session") -def wait_for_healthchecks(): - wait_for_healthcheck_util() - yield - - -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - -def _get_ingestionSources(frontend_session): +def _get_ingestionSources(auth_session): json_q = { "query": """query listIngestionSources($input: ListIngestionSourcesInput!) {\n listIngestionSources(input: $input) {\n @@ -35,8 +23,8 @@ def _get_ingestionSources(frontend_session): "variables": {"input": {"start": "0", "count": "20"}}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -51,8 +39,8 @@ def _get_ingestionSources(frontend_session): @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_ingestion_source_count(frontend_session, expected_count): - res_data = _get_ingestionSources(frontend_session) +def _ensure_ingestion_source_count(auth_session, expected_count): + res_data = _get_ingestionSources(auth_session) after_count = res_data["data"]["listIngestionSources"]["total"] assert after_count == expected_count return after_count @@ -61,7 +49,7 @@ def _ensure_ingestion_source_count(frontend_session, expected_count): @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_secret_increased(frontend_session, before_count): +def _ensure_secret_increased(auth_session, before_count): json_q = { "query": """query listSecrets($input: ListSecretsInput!) {\n listSecrets(input: $input) {\n @@ -77,8 +65,8 @@ def _ensure_secret_increased(frontend_session, before_count): "variables": {"input": {"start": "0", "count": "20"}}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -96,7 +84,7 @@ def _ensure_secret_increased(frontend_session, before_count): @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_secret_not_present(frontend_session): +def _ensure_secret_not_present(auth_session): # Get the secret value back json_q = { "query": """query getSecretValues($input: GetSecretValuesInput!) {\n @@ -108,8 +96,8 @@ def _ensure_secret_not_present(frontend_session): "variables": {"input": {"secrets": ["SMOKE_TEST"]}}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -128,7 +116,7 @@ def _ensure_secret_not_present(frontend_session): stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_ingestion_source_present( - frontend_session, ingestion_source_urn, num_execs=None + auth_session, ingestion_source_urn, num_execs=None ): json_q = { "query": """query ingestionSource($urn: String!) {\n @@ -146,8 +134,8 @@ def _ensure_ingestion_source_present( "variables": {"urn": ingestion_source_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -168,7 +156,7 @@ def _ensure_ingestion_source_present( @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_execution_request_present(frontend_session, execution_request_urn): +def _ensure_execution_request_present(auth_session, execution_request_urn): json_q = { "query": """query executionRequest($urn: String!) {\n executionRequest(urn: $urn) {\n @@ -190,8 +178,8 @@ def _ensure_execution_request_present(frontend_session, execution_request_urn): "variables": {"urn": execution_request_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -203,8 +191,7 @@ def _ensure_execution_request_present(frontend_session, execution_request_urn): return res_data -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_create_list_get_remove_secret(frontend_session): +def test_create_list_get_remove_secret(auth_session): # Get count of existing secrets json_q = { "query": """query listSecrets($input: ListSecretsInput!) {\n @@ -221,8 +208,8 @@ def test_create_list_get_remove_secret(frontend_session): "variables": {"input": {"start": "0", "count": "20"}}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -242,8 +229,8 @@ def test_create_list_get_remove_secret(frontend_session): "variables": {"input": {"name": "SMOKE_TEST", "value": "mytestvalue"}}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -256,7 +243,7 @@ def test_create_list_get_remove_secret(frontend_session): secret_urn = res_data["data"]["createSecret"] # Get new count of secrets - _ensure_secret_increased(frontend_session, before_count) + _ensure_secret_increased(auth_session, before_count) # Update existing secret json_q = { @@ -272,8 +259,8 @@ def test_create_list_get_remove_secret(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -296,8 +283,8 @@ def test_create_list_get_remove_secret(frontend_session): "variables": {"input": {"secrets": ["SMOKE_TEST"]}}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -320,8 +307,8 @@ def test_create_list_get_remove_secret(frontend_session): "variables": {"urn": secret_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -332,13 +319,13 @@ def test_create_list_get_remove_secret(frontend_session): assert "errors" not in res_data # Re-fetch the secret values and see that they are not there. - _ensure_secret_not_present(frontend_session) + _ensure_secret_not_present(auth_session) -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_create_list_get_remove_ingestion_source(frontend_session): +@pytest.mark.dependency() +def test_create_list_get_remove_ingestion_source(auth_session): # Get count of existing ingestion sources - res_data = _get_ingestionSources(frontend_session) + res_data = _get_ingestionSources(auth_session) before_count = res_data["data"]["listIngestionSources"]["total"] @@ -362,8 +349,8 @@ def test_create_list_get_remove_ingestion_source(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -376,7 +363,7 @@ def test_create_list_get_remove_ingestion_source(frontend_session): ingestion_source_urn = res_data["data"]["createIngestionSource"] # Assert that there are more ingestion sources now. - after_count = _ensure_ingestion_source_count(frontend_session, before_count + 1) + after_count = _ensure_ingestion_source_count(auth_session, before_count + 1) # Get the ingestion source back json_q = { @@ -399,8 +386,8 @@ def test_create_list_get_remove_ingestion_source(frontend_session): "variables": {"urn": ingestion_source_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -431,8 +418,8 @@ def test_create_list_get_remove_ingestion_source(frontend_session): "variables": {"urn": ingestion_source_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -444,16 +431,15 @@ def test_create_list_get_remove_ingestion_source(frontend_session): assert "errors" not in res_data # Ensure the ingestion source has been removed. - _ensure_ingestion_source_count(frontend_session, after_count - 1) + _ensure_ingestion_source_count(auth_session, after_count - 1) @pytest.mark.dependency( depends=[ - "test_healthchecks", "test_create_list_get_remove_ingestion_source", ] ) -def test_create_list_get_ingestion_execution_request(frontend_session): +def test_create_list_get_ingestion_execution_request(auth_session): # Create new ingestion source json_q = { "query": """mutation createIngestionSource($input: UpdateIngestionSourceInput!) {\n @@ -474,8 +460,8 @@ def test_create_list_get_ingestion_execution_request(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -495,8 +481,8 @@ def test_create_list_get_ingestion_execution_request(frontend_session): "variables": {"input": {"ingestionSourceUrn": ingestion_source_urn}}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() @@ -510,9 +496,7 @@ def test_create_list_get_ingestion_execution_request(frontend_session): execution_request_urn = res_data["data"]["createIngestionExecutionRequest"] - res_data = _ensure_ingestion_source_present( - frontend_session, ingestion_source_urn, 1 - ) + res_data = _ensure_ingestion_source_present(auth_session, ingestion_source_urn, 1) ingestion_source = res_data["data"]["ingestionSource"] @@ -522,9 +506,7 @@ def test_create_list_get_ingestion_execution_request(frontend_session): ) # Get the ingestion request back via direct lookup - res_data = _ensure_execution_request_present( - frontend_session, execution_request_urn - ) + res_data = _ensure_execution_request_present(auth_session, execution_request_urn) execution_request = res_data["data"]["executionRequest"] assert execution_request["urn"] == execution_request_urn @@ -557,8 +539,8 @@ def test_create_list_get_ingestion_execution_request(frontend_session): "variables": {"urn": ingestion_source_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json_q + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json_q ) response.raise_for_status() res_data = response.json() diff --git a/smoke-test/tests/openapi/test_openapi.py b/smoke-test/tests/openapi/test_openapi.py index 20398e0e58168..0217b185570be 100644 --- a/smoke-test/tests/openapi/test_openapi.py +++ b/smoke-test/tests/openapi/test_openapi.py @@ -3,40 +3,29 @@ import json import logging -import pytest from deepdiff import DeepDiff -import requests_wrapper as requests -from tests.utils import get_gms_url - logger = logging.getLogger(__name__) -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - def load_tests(fixture_glob="tests/openapi/**/*.json"): for test_fixture in glob.glob(fixture_glob): with open(test_fixture) as f: yield (test_fixture, json.load(f)) -def execute_request(request): - session = requests.Session() +def execute_request(auth_session, request): if "method" in request: method = request.pop("method") else: method = "post" - url = get_gms_url() + request.pop("url") + url = auth_session.gms_url() + request.pop("url") - return getattr(session, method)(url, **request) + return getattr(auth_session, method)(url, **request) -def evaluate_test(test_name, test_data): +def evaluate_test(auth_session, test_name, test_data): try: for idx, req_resp in enumerate(test_data): if "description" in req_resp["request"]: @@ -44,7 +33,7 @@ def evaluate_test(test_name, test_data): else: description = None url = req_resp["request"]["url"] - actual_resp = execute_request(req_resp["request"]) + actual_resp = execute_request(auth_session, req_resp["request"]) try: if "response" in req_resp and "status_codes" in req_resp["response"]: assert ( @@ -82,32 +71,32 @@ def evaluate_test(test_name, test_data): raise e -def run_tests(fixture_globs, num_workers=3): +def run_tests(auth_session, fixture_globs, num_workers=3): with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor: futures = [] for fixture_glob in fixture_globs: for test_fixture, test_data in load_tests(fixture_glob=fixture_glob): - futures.append(executor.submit(evaluate_test, test_fixture, test_data)) + futures.append( + executor.submit( + evaluate_test, auth_session, test_fixture, test_data + ) + ) for future in concurrent.futures.as_completed(futures): logger.info(future.result()) -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_openapi_all(): - run_tests(fixture_globs=["tests/openapi/*/*.json"], num_workers=10) +def test_openapi_all(auth_session): + run_tests(auth_session, fixture_globs=["tests/openapi/*/*.json"], num_workers=10) -# @pytest.mark.dependency(depends=["test_healthchecks"]) -# def test_openapi_v1(): -# run_tests(fixture_glob="tests/openapi/v1/*.json", num_workers=4) +# def test_openapi_v1(auth_session): +# run_tests(auth_session, fixture_globs=["tests/openapi/v1/*.json"], num_workers=4) # # -# @pytest.mark.dependency(depends=["test_healthchecks"]) -# def test_openapi_v2(): -# run_tests(fixture_glob="tests/openapi/v2/*.json", num_workers=4) +# def test_openapi_v2(auth_session): +# run_tests(auth_session, fixture_globs=["tests/openapi/v2/*.json"], num_workers=4) # # -# @pytest.mark.dependency(depends=["test_healthchecks"]) -# def test_openapi_v3(): -# run_tests(fixture_glob="tests/openapi/v3/*.json", num_workers=4) +# def test_openapi_v3(auth_session): +# run_tests(auth_session, fixture_globs=["tests/openapi/v3/*.json"], num_workers=4) diff --git a/smoke-test/tests/openapi/v1/timeline.json b/smoke-test/tests/openapi/v1/timeline.json index e59407fd8188f..3d8c8c37c0971 100644 --- a/smoke-test/tests/openapi/v1/timeline.json +++ b/smoke-test/tests/openapi/v1/timeline.json @@ -339,7 +339,7 @@ "json": [ { "timestamp": 1723245258298, - "actor": "urn:li:corpuser:__datahub_system", + "actor": "urn:li:corpuser:datahub", "semVer": "0.0.0-computed", "semVerChange": "MINOR", "changeEvents": [ @@ -432,7 +432,7 @@ }, { "timestamp": 1723245269788, - "actor": "urn:li:corpuser:__datahub_system", + "actor": "urn:li:corpuser:datahub", "semVer": "1.0.0-computed", "semVerChange": "MAJOR", "changeEvents": [ @@ -483,7 +483,7 @@ }, { "timestamp": 1723245279320, - "actor": "urn:li:corpuser:__datahub_system", + "actor": "urn:li:corpuser:datahub", "semVer": "2.0.0-computed", "semVerChange": "MAJOR", "changeEvents": [ diff --git a/smoke-test/tests/openapi/v2/timeline.json b/smoke-test/tests/openapi/v2/timeline.json index 5521ee2376278..4565ae187cd2d 100644 --- a/smoke-test/tests/openapi/v2/timeline.json +++ b/smoke-test/tests/openapi/v2/timeline.json @@ -339,7 +339,7 @@ "json": [ { "timestamp": 1726608877854, - "actor": "urn:li:corpuser:__datahub_system", + "actor": "urn:li:corpuser:datahub", "semVer": "0.0.0-computed", "semVerChange": "MINOR", "changeEvents": [ @@ -432,7 +432,7 @@ }, { "timestamp": 1726608915493, - "actor": "urn:li:corpuser:__datahub_system", + "actor": "urn:li:corpuser:datahub", "semVer": "1.0.0-computed", "semVerChange": "MAJOR", "changeEvents": [ @@ -483,7 +483,7 @@ }, { "timestamp": 1726608930642, - "actor": "urn:li:corpuser:__datahub_system", + "actor": "urn:li:corpuser:datahub", "semVer": "2.0.0-computed", "semVerChange": "MAJOR", "changeEvents": [ diff --git a/smoke-test/tests/patch/common_patch_tests.py b/smoke-test/tests/patch/common_patch_tests.py index 9530edb760c13..25ce944158497 100644 --- a/smoke-test/tests/patch/common_patch_tests.py +++ b/smoke-test/tests/patch/common_patch_tests.py @@ -5,7 +5,7 @@ from datahub.emitter.mce_builder import make_tag_urn, make_term_urn, make_user_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_patch_builder import MetadataPatchProposal -from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig +from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.schema_classes import ( AuditStampClass, GlobalTagsClass, @@ -20,11 +20,12 @@ def helper_test_entity_terms_patch( + graph_client: DataHubGraph, test_entity_urn: str, patch_builder_class: Type[MetadataPatchProposal], ): - def get_terms(graph, entity_urn): - return graph.get_aspect( + def get_terms(entity_urn): + return graph_client.get_aspect( entity_urn=entity_urn, aspect_type=GlossaryTermsClass, ) @@ -40,40 +41,38 @@ def get_terms(graph, entity_urn): ) mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=global_terms) - with DataHubGraph(DataHubGraphConfig()) as graph: - graph.emit_mcp(mcpw) - terms_read = get_terms(graph, test_entity_urn) - assert terms_read.terms[0].urn == term_urn - assert terms_read.terms[0].context == "test" + graph_client.emit_mcp(mcpw) + terms_read = get_terms(test_entity_urn) + assert terms_read.terms[0].urn == term_urn + assert terms_read.terms[0].context == "test" - new_term = GlossaryTermAssociationClass( - urn=make_term_urn(f"test-{uuid.uuid4()}") - ) - patch_builder = patch_builder_class(test_entity_urn) - assert hasattr(patch_builder, "add_term") - for patch_mcp in patch_builder.add_term(new_term).build(): - graph.emit_mcp(patch_mcp) - pass + new_term = GlossaryTermAssociationClass(urn=make_term_urn(f"test-{uuid.uuid4()}")) + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "add_term") + for patch_mcp in patch_builder.add_term(new_term).build(): + graph_client.emit_mcp(patch_mcp) + pass - terms_read = get_terms(graph, test_entity_urn) + terms_read = get_terms(test_entity_urn) - assert terms_read.terms[0].urn == term_urn - assert terms_read.terms[0].context == "test" - assert terms_read.terms[1].urn == new_term.urn - assert terms_read.terms[1].context is None + assert terms_read.terms[0].urn == term_urn + assert terms_read.terms[0].context == "test" + assert terms_read.terms[1].urn == new_term.urn + assert terms_read.terms[1].context is None - patch_builder = patch_builder_class(test_entity_urn) - assert hasattr(patch_builder, "remove_term") - for patch_mcp in patch_builder.remove_term(term_urn).build(): - graph.emit_mcp(patch_mcp) - pass + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "remove_term") + for patch_mcp in patch_builder.remove_term(term_urn).build(): + graph_client.emit_mcp(patch_mcp) + pass - terms_read = get_terms(graph, test_entity_urn) - assert len(terms_read.terms) == 1 - assert terms_read.terms[0].urn == new_term.urn + terms_read = get_terms(test_entity_urn) + assert len(terms_read.terms) == 1 + assert terms_read.terms[0].urn == new_term.urn def helper_test_dataset_tags_patch( + graph_client: DataHubGraph, test_entity_urn: str, patch_builder_class: Type[MetadataPatchProposal], ): @@ -83,49 +82,49 @@ def helper_test_dataset_tags_patch( global_tags = GlobalTagsClass(tags=[tag_association]) mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=global_tags) - with DataHubGraph(DataHubGraphConfig()) as graph: - graph.emit_mcp(mcpw) - tags_read = graph.get_aspect( - entity_urn=test_entity_urn, - aspect_type=GlobalTagsClass, - ) - assert tags_read is not None - assert tags_read.tags[0].tag == tag_urn - assert tags_read.tags[0].context == "test" - - new_tag = TagAssociationClass(tag=make_tag_urn(f"test-{uuid.uuid4()}")) - patch_builder = patch_builder_class(test_entity_urn) - assert hasattr(patch_builder, "add_tag") - for patch_mcp in patch_builder.add_tag(new_tag).build(): - graph.emit_mcp(patch_mcp) - pass - - tags_read = graph.get_aspect( - entity_urn=test_entity_urn, - aspect_type=GlobalTagsClass, - ) - assert tags_read is not None - assert tags_read.tags[0].tag == tag_urn - assert tags_read.tags[0].context == "test" - assert tags_read.tags[1].tag == new_tag.tag - assert tags_read.tags[1].context is None - - patch_builder = patch_builder_class(test_entity_urn) - assert hasattr(patch_builder, "remove_tag") - for patch_mcp in patch_builder.remove_tag(tag_urn).build(): - graph.emit_mcp(patch_mcp) - pass - - tags_read = graph.get_aspect( - entity_urn=test_entity_urn, - aspect_type=GlobalTagsClass, - ) - assert tags_read is not None - assert len(tags_read.tags) == 1 - assert tags_read.tags[0].tag == new_tag.tag + graph_client.emit_mcp(mcpw) + tags_read = graph_client.get_aspect( + entity_urn=test_entity_urn, + aspect_type=GlobalTagsClass, + ) + assert tags_read is not None + assert tags_read.tags[0].tag == tag_urn + assert tags_read.tags[0].context == "test" + + new_tag = TagAssociationClass(tag=make_tag_urn(f"test-{uuid.uuid4()}")) + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "add_tag") + for patch_mcp in patch_builder.add_tag(new_tag).build(): + graph_client.emit_mcp(patch_mcp) + pass + + tags_read = graph_client.get_aspect( + entity_urn=test_entity_urn, + aspect_type=GlobalTagsClass, + ) + assert tags_read is not None + assert tags_read.tags[0].tag == tag_urn + assert tags_read.tags[0].context == "test" + assert tags_read.tags[1].tag == new_tag.tag + assert tags_read.tags[1].context is None + + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "remove_tag") + for patch_mcp in patch_builder.remove_tag(tag_urn).build(): + graph_client.emit_mcp(patch_mcp) + pass + + tags_read = graph_client.get_aspect( + entity_urn=test_entity_urn, + aspect_type=GlobalTagsClass, + ) + assert tags_read is not None + assert len(tags_read.tags) == 1 + assert tags_read.tags[0].tag == new_tag.tag def helper_test_ownership_patch( + graph_client: DataHubGraph, test_entity_urn: str, patch_builder_class: Type[MetadataPatchProposal], ): @@ -140,42 +139,47 @@ def helper_test_ownership_patch( mcpw = MetadataChangeProposalWrapper( entityUrn=test_entity_urn, aspect=ownership_to_set ) - with DataHubGraph(DataHubGraphConfig()) as graph: - graph.emit_mcp(mcpw) - owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass) - assert owner is not None - assert owner.owners[0].owner == make_user_urn("jdoe") - patch_builder = patch_builder_class(test_entity_urn) - assert hasattr(patch_builder, "add_owner") - for patch_mcp in patch_builder.add_owner(owner_to_add).build(): - graph.emit_mcp(patch_mcp) + graph_client.emit_mcp(mcpw) + owner = graph_client.get_aspect( + entity_urn=test_entity_urn, aspect_type=OwnershipClass + ) + assert owner is not None + assert owner.owners[0].owner == make_user_urn("jdoe") + + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "add_owner") + for patch_mcp in patch_builder.add_owner(owner_to_add).build(): + graph_client.emit_mcp(patch_mcp) - owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass) - assert owner is not None - assert len(owner.owners) == 2 + owner = graph_client.get_aspect( + entity_urn=test_entity_urn, aspect_type=OwnershipClass + ) + assert owner is not None + assert len(owner.owners) == 2 - patch_builder = patch_builder_class(test_entity_urn) - assert hasattr(patch_builder, "remove_owner") - for patch_mcp in patch_builder.remove_owner(make_user_urn("gdoe")).build(): - graph.emit_mcp(patch_mcp) + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "remove_owner") + for patch_mcp in patch_builder.remove_owner(make_user_urn("gdoe")).build(): + graph_client.emit_mcp(patch_mcp) - owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass) - assert owner is not None - assert len(owner.owners) == 1 - assert owner.owners[0].owner == make_user_urn("jdoe") + owner = graph_client.get_aspect( + entity_urn=test_entity_urn, aspect_type=OwnershipClass + ) + assert owner is not None + assert len(owner.owners) == 1 + assert owner.owners[0].owner == make_user_urn("jdoe") def helper_test_custom_properties_patch( + graph_client: DataHubGraph, test_entity_urn: str, patch_builder_class: Type[MetadataPatchProposal], custom_properties_aspect_class: Type[_Aspect], base_aspect: _Aspect, ): - def get_custom_properties( - graph: DataHubGraph, entity_urn: str - ) -> Optional[Dict[str, str]]: - custom_properties_aspect = graph.get_aspect( + def get_custom_properties(entity_urn: str) -> Optional[Dict[str, str]]: + custom_properties_aspect = graph_client.get_aspect( entity_urn=entity_urn, aspect_type=custom_properties_aspect_class, ) @@ -190,72 +194,71 @@ def get_custom_properties( orig_aspect.customProperties = base_property_map mcpw = MetadataChangeProposalWrapper(entityUrn=test_entity_urn, aspect=orig_aspect) - with DataHubGraph(DataHubGraphConfig()) as graph: - graph.emit(mcpw) - # assert custom properties looks as expected - custom_properties = get_custom_properties(graph, test_entity_urn) - assert custom_properties - for k, v in base_property_map.items(): - assert custom_properties[k] == v - - new_properties = { - "test_property": "test_value", - "test_property1": "test_value1", - } - - entity_patch_builder = patch_builder_class(test_entity_urn) - assert hasattr(entity_patch_builder, "add_custom_property") - for k, v in new_properties.items(): - entity_patch_builder.add_custom_property(k, v) - - for patch_mcp in entity_patch_builder.build(): - graph.emit_mcp(patch_mcp) - - custom_properties = get_custom_properties(graph, test_entity_urn) - - assert custom_properties is not None - for k, v in new_properties.items(): - assert custom_properties[k] == v - - # ensure exising properties were not touched - for k, v in base_property_map.items(): - assert custom_properties[k] == v - - # Remove property - patch_builder = patch_builder_class(test_entity_urn) - assert hasattr(patch_builder, "remove_custom_property") - for patch_mcp in patch_builder.remove_custom_property("test_property").build(): - graph.emit_mcp(patch_mcp) - - custom_properties = get_custom_properties(graph, test_entity_urn) - - assert custom_properties is not None - assert "test_property" not in custom_properties - assert custom_properties["test_property1"] == "test_value1" - - # ensure exising properties were not touched - for k, v in base_property_map.items(): - assert custom_properties[k] == v - - # Replace custom properties - patch_builder = patch_builder_class(test_entity_urn) - assert hasattr(patch_builder, "set_custom_properties") - for patch_mcp in patch_builder.set_custom_properties(new_properties).build(): - graph.emit_mcp(patch_mcp) - - custom_properties = get_custom_properties(graph, test_entity_urn) - - assert custom_properties is not None - for k in base_property_map: - assert k not in custom_properties - for k, v in new_properties.items(): - assert custom_properties[k] == v - - # ensure existing fields were not touched - full_aspect: Optional[_Aspect] = graph.get_aspect( - test_entity_urn, custom_properties_aspect_class - ) + graph_client.emit(mcpw) + # assert custom properties looks as expected + custom_properties = get_custom_properties(test_entity_urn) + assert custom_properties + for k, v in base_property_map.items(): + assert custom_properties[k] == v + + new_properties = { + "test_property": "test_value", + "test_property1": "test_value1", + } + + entity_patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(entity_patch_builder, "add_custom_property") + for k, v in new_properties.items(): + entity_patch_builder.add_custom_property(k, v) + + for patch_mcp in entity_patch_builder.build(): + graph_client.emit_mcp(patch_mcp) + + custom_properties = get_custom_properties(test_entity_urn) + + assert custom_properties is not None + for k, v in new_properties.items(): + assert custom_properties[k] == v + + # ensure exising properties were not touched + for k, v in base_property_map.items(): + assert custom_properties[k] == v + + # Remove property + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "remove_custom_property") + for patch_mcp in patch_builder.remove_custom_property("test_property").build(): + graph_client.emit_mcp(patch_mcp) + + custom_properties = get_custom_properties(test_entity_urn) + + assert custom_properties is not None + assert "test_property" not in custom_properties + assert custom_properties["test_property1"] == "test_value1" + + # ensure exising properties were not touched + for k, v in base_property_map.items(): + assert custom_properties[k] == v + + # Replace custom properties + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "set_custom_properties") + for patch_mcp in patch_builder.set_custom_properties(new_properties).build(): + graph_client.emit_mcp(patch_mcp) + + custom_properties = get_custom_properties(test_entity_urn) + + assert custom_properties is not None + for k in base_property_map: + assert k not in custom_properties + for k, v in new_properties.items(): + assert custom_properties[k] == v + + # ensure existing fields were not touched + full_aspect: Optional[_Aspect] = graph_client.get_aspect( + test_entity_urn, custom_properties_aspect_class + ) - assert full_aspect - for k, v in orig_aspect.__dict__.items(): - assert full_aspect.__dict__[k] == v + assert full_aspect + for k, v in orig_aspect.__dict__.items(): + assert full_aspect.__dict__[k] == v diff --git a/smoke-test/tests/patch/test_datajob_patches.py b/smoke-test/tests/patch/test_datajob_patches.py index ce63d4571d6c8..eb129e1e03212 100644 --- a/smoke-test/tests/patch/test_datajob_patches.py +++ b/smoke-test/tests/patch/test_datajob_patches.py @@ -2,7 +2,6 @@ from datahub.emitter.mce_builder import make_data_job_urn, make_dataset_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig from datahub.metadata.schema_classes import ( DataJobInfoClass, DataJobInputOutputClass, @@ -30,25 +29,30 @@ def _make_test_datajob_urn( # Common Aspect Patch Tests # Ownership -def test_datajob_ownership_patch(wait_for_healthchecks): +def test_datajob_ownership_patch(graph_client): datajob_urn = _make_test_datajob_urn() - helper_test_ownership_patch(datajob_urn, DataJobPatchBuilder) + helper_test_ownership_patch(graph_client, datajob_urn, DataJobPatchBuilder) # Tags -def test_datajob_tags_patch(wait_for_healthchecks): - helper_test_dataset_tags_patch(_make_test_datajob_urn(), DataJobPatchBuilder) +def test_datajob_tags_patch(graph_client): + helper_test_dataset_tags_patch( + graph_client, _make_test_datajob_urn(), DataJobPatchBuilder + ) # Terms -def test_dataset_terms_patch(wait_for_healthchecks): - helper_test_entity_terms_patch(_make_test_datajob_urn(), DataJobPatchBuilder) +def test_dataset_terms_patch(graph_client): + helper_test_entity_terms_patch( + graph_client, _make_test_datajob_urn(), DataJobPatchBuilder + ) # Custom Properties -def test_custom_properties_patch(wait_for_healthchecks): +def test_custom_properties_patch(graph_client): orig_datajob_info = DataJobInfoClass(name="test_name", type="TestJobType") helper_test_custom_properties_patch( + graph_client, test_entity_urn=_make_test_datajob_urn(), patch_builder_class=DataJobPatchBuilder, custom_properties_aspect_class=DataJobInfoClass, @@ -58,7 +62,7 @@ def test_custom_properties_patch(wait_for_healthchecks): # Specific Aspect Patch Tests # Input/Output -def test_datajob_inputoutput_dataset_patch(wait_for_healthchecks): +def test_datajob_inputoutput_dataset_patch(graph_client): datajob_urn = _make_test_datajob_urn() other_dataset_urn = make_dataset_urn( @@ -79,59 +83,56 @@ def test_datajob_inputoutput_dataset_patch(wait_for_healthchecks): entityUrn=datajob_urn, aspect=inputoutput_lineage ) - with DataHubGraph(DataHubGraphConfig()) as graph: - graph.emit_mcp(mcpw) - inputoutput_lineage_read = graph.get_aspect( - entity_urn=datajob_urn, - aspect_type=DataJobInputOutputClass, - ) - assert inputoutput_lineage_read is not None - assert inputoutput_lineage_read.inputDatasetEdges is not None - assert ( - inputoutput_lineage_read.inputDatasetEdges[0].destinationUrn - == other_dataset_urn - ) - - for patch_mcp in ( - DataJobPatchBuilder(datajob_urn) - .add_input_dataset(dataset_input_lineage_to_add) - .build() - ): - graph.emit_mcp(patch_mcp) - pass - - inputoutput_lineage_read = graph.get_aspect( - entity_urn=datajob_urn, - aspect_type=DataJobInputOutputClass, - ) - assert inputoutput_lineage_read is not None - assert inputoutput_lineage_read.inputDatasetEdges is not None - assert len(inputoutput_lineage_read.inputDatasetEdges) == 2 - assert ( - inputoutput_lineage_read.inputDatasetEdges[0].destinationUrn - == other_dataset_urn - ) - assert ( - inputoutput_lineage_read.inputDatasetEdges[1].destinationUrn - == patch_dataset_urn - ) - - for patch_mcp in ( - DataJobPatchBuilder(datajob_urn) - .remove_input_dataset(patch_dataset_urn) - .build() - ): - graph.emit_mcp(patch_mcp) - pass - - inputoutput_lineage_read = graph.get_aspect( - entity_urn=datajob_urn, - aspect_type=DataJobInputOutputClass, - ) - assert inputoutput_lineage_read is not None - assert inputoutput_lineage_read.inputDatasetEdges is not None - assert len(inputoutput_lineage_read.inputDatasetEdges) == 1 - assert ( - inputoutput_lineage_read.inputDatasetEdges[0].destinationUrn - == other_dataset_urn - ) + graph_client.emit_mcp(mcpw) + inputoutput_lineage_read = graph_client.get_aspect( + entity_urn=datajob_urn, + aspect_type=DataJobInputOutputClass, + ) + assert inputoutput_lineage_read is not None + assert inputoutput_lineage_read.inputDatasetEdges is not None + assert ( + inputoutput_lineage_read.inputDatasetEdges[0].destinationUrn + == other_dataset_urn + ) + + for patch_mcp in ( + DataJobPatchBuilder(datajob_urn) + .add_input_dataset(dataset_input_lineage_to_add) + .build() + ): + graph_client.emit_mcp(patch_mcp) + pass + + inputoutput_lineage_read = graph_client.get_aspect( + entity_urn=datajob_urn, + aspect_type=DataJobInputOutputClass, + ) + assert inputoutput_lineage_read is not None + assert inputoutput_lineage_read.inputDatasetEdges is not None + assert len(inputoutput_lineage_read.inputDatasetEdges) == 2 + assert ( + inputoutput_lineage_read.inputDatasetEdges[0].destinationUrn + == other_dataset_urn + ) + assert ( + inputoutput_lineage_read.inputDatasetEdges[1].destinationUrn + == patch_dataset_urn + ) + + for patch_mcp in ( + DataJobPatchBuilder(datajob_urn).remove_input_dataset(patch_dataset_urn).build() + ): + graph_client.emit_mcp(patch_mcp) + pass + + inputoutput_lineage_read = graph_client.get_aspect( + entity_urn=datajob_urn, + aspect_type=DataJobInputOutputClass, + ) + assert inputoutput_lineage_read is not None + assert inputoutput_lineage_read.inputDatasetEdges is not None + assert len(inputoutput_lineage_read.inputDatasetEdges) == 1 + assert ( + inputoutput_lineage_read.inputDatasetEdges[0].destinationUrn + == other_dataset_urn + ) diff --git a/smoke-test/tests/patch/test_dataset_patches.py b/smoke-test/tests/patch/test_dataset_patches.py index 0c161fb0e6607..bafacbc8d2b0b 100644 --- a/smoke-test/tests/patch/test_dataset_patches.py +++ b/smoke-test/tests/patch/test_dataset_patches.py @@ -3,7 +3,7 @@ from datahub.emitter.mce_builder import make_dataset_urn, make_tag_urn, make_term_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.graph.client import DataHubGraph, get_default_graph +from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.schema_classes import ( DatasetLineageTypeClass, DatasetPropertiesClass, @@ -26,30 +26,30 @@ # Common Aspect Patch Tests # Ownership -def test_dataset_ownership_patch(wait_for_healthchecks): +def test_dataset_ownership_patch(graph_client): dataset_urn = make_dataset_urn( platform="hive", name=f"SampleHiveDataset{uuid.uuid4()}", env="PROD" ) - helper_test_ownership_patch(dataset_urn, DatasetPatchBuilder) + helper_test_ownership_patch(graph_client, dataset_urn, DatasetPatchBuilder) # Tags -def test_dataset_tags_patch(wait_for_healthchecks): +def test_dataset_tags_patch(graph_client): dataset_urn = make_dataset_urn( platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD" ) - helper_test_dataset_tags_patch(dataset_urn, DatasetPatchBuilder) + helper_test_dataset_tags_patch(graph_client, dataset_urn, DatasetPatchBuilder) # Terms -def test_dataset_terms_patch(wait_for_healthchecks): +def test_dataset_terms_patch(graph_client): dataset_urn = make_dataset_urn( platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD" ) - helper_test_entity_terms_patch(dataset_urn, DatasetPatchBuilder) + helper_test_entity_terms_patch(graph_client, dataset_urn, DatasetPatchBuilder) -def test_dataset_upstream_lineage_patch(wait_for_healthchecks): +def test_dataset_upstream_lineage_patch(graph_client: DataHubGraph): dataset_urn = make_dataset_urn( platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD" ) @@ -72,54 +72,53 @@ def test_dataset_upstream_lineage_patch(wait_for_healthchecks): ) mcpw = MetadataChangeProposalWrapper(entityUrn=dataset_urn, aspect=upstream_lineage) - with get_default_graph() as graph: - graph.emit_mcp(mcpw) - upstream_lineage_read = graph.get_aspect_v2( - entity_urn=dataset_urn, - aspect_type=UpstreamLineageClass, - aspect="upstreamLineage", - ) - - assert upstream_lineage_read is not None - assert len(upstream_lineage_read.upstreams) > 0 - assert upstream_lineage_read.upstreams[0].dataset == other_dataset_urn - - for patch_mcp in ( - DatasetPatchBuilder(dataset_urn) - .add_upstream_lineage(upstream_lineage_to_add) - .build() - ): - graph.emit_mcp(patch_mcp) - pass - - upstream_lineage_read = graph.get_aspect_v2( - entity_urn=dataset_urn, - aspect_type=UpstreamLineageClass, - aspect="upstreamLineage", - ) - - assert upstream_lineage_read is not None - assert len(upstream_lineage_read.upstreams) == 2 - assert upstream_lineage_read.upstreams[0].dataset == other_dataset_urn - assert upstream_lineage_read.upstreams[1].dataset == patch_dataset_urn - - for patch_mcp in ( - DatasetPatchBuilder(dataset_urn) - .remove_upstream_lineage(upstream_lineage_to_add.dataset) - .build() - ): - graph.emit_mcp(patch_mcp) - pass - - upstream_lineage_read = graph.get_aspect_v2( - entity_urn=dataset_urn, - aspect_type=UpstreamLineageClass, - aspect="upstreamLineage", - ) - - assert upstream_lineage_read is not None - assert len(upstream_lineage_read.upstreams) == 1 - assert upstream_lineage_read.upstreams[0].dataset == other_dataset_urn + graph_client.emit_mcp(mcpw) + upstream_lineage_read = graph_client.get_aspect_v2( + entity_urn=dataset_urn, + aspect_type=UpstreamLineageClass, + aspect="upstreamLineage", + ) + + assert upstream_lineage_read is not None + assert len(upstream_lineage_read.upstreams) > 0 + assert upstream_lineage_read.upstreams[0].dataset == other_dataset_urn + + for patch_mcp in ( + DatasetPatchBuilder(dataset_urn) + .add_upstream_lineage(upstream_lineage_to_add) + .build() + ): + graph_client.emit_mcp(patch_mcp) + pass + + upstream_lineage_read = graph_client.get_aspect_v2( + entity_urn=dataset_urn, + aspect_type=UpstreamLineageClass, + aspect="upstreamLineage", + ) + + assert upstream_lineage_read is not None + assert len(upstream_lineage_read.upstreams) == 2 + assert upstream_lineage_read.upstreams[0].dataset == other_dataset_urn + assert upstream_lineage_read.upstreams[1].dataset == patch_dataset_urn + + for patch_mcp in ( + DatasetPatchBuilder(dataset_urn) + .remove_upstream_lineage(upstream_lineage_to_add.dataset) + .build() + ): + graph_client.emit_mcp(patch_mcp) + pass + + upstream_lineage_read = graph_client.get_aspect_v2( + entity_urn=dataset_urn, + aspect_type=UpstreamLineageClass, + aspect="upstreamLineage", + ) + + assert upstream_lineage_read is not None + assert len(upstream_lineage_read.upstreams) == 1 + assert upstream_lineage_read.upstreams[0].dataset == other_dataset_urn def get_field_info( @@ -139,7 +138,7 @@ def get_field_info( return None -def test_field_terms_patch(wait_for_healthchecks): +def test_field_terms_patch(graph_client: DataHubGraph): dataset_urn = make_dataset_urn( platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD" ) @@ -155,52 +154,49 @@ def test_field_terms_patch(wait_for_healthchecks): ) mcpw = MetadataChangeProposalWrapper(entityUrn=dataset_urn, aspect=editable_field) - with get_default_graph() as graph: - graph.emit_mcp(mcpw) - field_info = get_field_info(graph, dataset_urn, field_path) - assert field_info - assert field_info.description == "This is a test field" - - new_term = GlossaryTermAssociationClass( - urn=make_term_urn(f"test-{uuid.uuid4()}") - ) - for patch_mcp in ( - DatasetPatchBuilder(dataset_urn) - .for_field(field_path) - .add_term(new_term) - .parent() - .build() - ): - graph.emit_mcp(patch_mcp) - pass - - field_info = get_field_info(graph, dataset_urn, field_path) - - assert field_info - assert field_info.description == "This is a test field" - assert field_info.glossaryTerms is not None - assert len(field_info.glossaryTerms.terms) == 1 - assert field_info.glossaryTerms.terms[0].urn == new_term.urn - - for patch_mcp in ( - DatasetPatchBuilder(dataset_urn) - .for_field(field_path) - .remove_term(new_term.urn) - .parent() - .build() - ): - graph.emit_mcp(patch_mcp) - pass - - field_info = get_field_info(graph, dataset_urn, field_path) - - assert field_info - assert field_info.description == "This is a test field" - assert field_info.glossaryTerms is not None - assert len(field_info.glossaryTerms.terms) == 0 - - -def test_field_tags_patch(wait_for_healthchecks): + graph_client.emit_mcp(mcpw) + field_info = get_field_info(graph_client, dataset_urn, field_path) + assert field_info + assert field_info.description == "This is a test field" + + new_term = GlossaryTermAssociationClass(urn=make_term_urn(f"test-{uuid.uuid4()}")) + for patch_mcp in ( + DatasetPatchBuilder(dataset_urn) + .for_field(field_path) + .add_term(new_term) + .parent() + .build() + ): + graph_client.emit_mcp(patch_mcp) + pass + + field_info = get_field_info(graph_client, dataset_urn, field_path) + + assert field_info + assert field_info.description == "This is a test field" + assert field_info.glossaryTerms is not None + assert len(field_info.glossaryTerms.terms) == 1 + assert field_info.glossaryTerms.terms[0].urn == new_term.urn + + for patch_mcp in ( + DatasetPatchBuilder(dataset_urn) + .for_field(field_path) + .remove_term(new_term.urn) + .parent() + .build() + ): + graph_client.emit_mcp(patch_mcp) + pass + + field_info = get_field_info(graph_client, dataset_urn, field_path) + + assert field_info + assert field_info.description == "This is a test field" + assert field_info.glossaryTerms is not None + assert len(field_info.glossaryTerms.terms) == 0 + + +def test_field_tags_patch(graph_client: DataHubGraph): dataset_urn = make_dataset_urn( platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD" ) @@ -216,69 +212,68 @@ def test_field_tags_patch(wait_for_healthchecks): ) mcpw = MetadataChangeProposalWrapper(entityUrn=dataset_urn, aspect=editable_field) - with get_default_graph() as graph: - graph.emit_mcp(mcpw) - field_info = get_field_info(graph, dataset_urn, field_path) - assert field_info - assert field_info.description == "This is a test field" - - new_tag_urn = make_tag_urn(tag=f"testTag-{uuid.uuid4()}") - - new_tag = TagAssociationClass(tag=new_tag_urn, context="test") - - for patch_mcp in ( - DatasetPatchBuilder(dataset_urn) - .for_field(field_path) - .add_tag(new_tag) - .parent() - .build() - ): - graph.emit_mcp(patch_mcp) - pass - - field_info = get_field_info(graph, dataset_urn, field_path) - - assert field_info - assert field_info.description == "This is a test field" - assert field_info.globalTags is not None - assert len(field_info.globalTags.tags) == 1 - assert field_info.globalTags.tags[0].tag == new_tag.tag - - # Add the same tag again and verify that it doesn't get added - for patch_mcp in ( - DatasetPatchBuilder(dataset_urn) - .for_field(field_path) - .add_tag(new_tag) - .parent() - .build() - ): - graph.emit_mcp(patch_mcp) - pass - - field_info = get_field_info(graph, dataset_urn, field_path) - - assert field_info - assert field_info.description == "This is a test field" - assert field_info.globalTags is not None - assert len(field_info.globalTags.tags) == 1 - assert field_info.globalTags.tags[0].tag == new_tag.tag - - for patch_mcp in ( - DatasetPatchBuilder(dataset_urn) - .for_field(field_path) - .remove_tag(new_tag.tag) - .parent() - .build() - ): - graph.emit_mcp(patch_mcp) - pass - - field_info = get_field_info(graph, dataset_urn, field_path) - - assert field_info - assert field_info.description == "This is a test field" - assert field_info.globalTags is not None - assert len(field_info.globalTags.tags) == 0 + graph_client.emit_mcp(mcpw) + field_info = get_field_info(graph_client, dataset_urn, field_path) + assert field_info + assert field_info.description == "This is a test field" + + new_tag_urn = make_tag_urn(tag=f"testTag-{uuid.uuid4()}") + + new_tag = TagAssociationClass(tag=new_tag_urn, context="test") + + for patch_mcp in ( + DatasetPatchBuilder(dataset_urn) + .for_field(field_path) + .add_tag(new_tag) + .parent() + .build() + ): + graph_client.emit_mcp(patch_mcp) + pass + + field_info = get_field_info(graph_client, dataset_urn, field_path) + + assert field_info + assert field_info.description == "This is a test field" + assert field_info.globalTags is not None + assert len(field_info.globalTags.tags) == 1 + assert field_info.globalTags.tags[0].tag == new_tag.tag + + # Add the same tag again and verify that it doesn't get added + for patch_mcp in ( + DatasetPatchBuilder(dataset_urn) + .for_field(field_path) + .add_tag(new_tag) + .parent() + .build() + ): + graph_client.emit_mcp(patch_mcp) + pass + + field_info = get_field_info(graph_client, dataset_urn, field_path) + + assert field_info + assert field_info.description == "This is a test field" + assert field_info.globalTags is not None + assert len(field_info.globalTags.tags) == 1 + assert field_info.globalTags.tags[0].tag == new_tag.tag + + for patch_mcp in ( + DatasetPatchBuilder(dataset_urn) + .for_field(field_path) + .remove_tag(new_tag.tag) + .parent() + .build() + ): + graph_client.emit_mcp(patch_mcp) + pass + + field_info = get_field_info(graph_client, dataset_urn, field_path) + + assert field_info + assert field_info.description == "This is a test field" + assert field_info.globalTags is not None + assert len(field_info.globalTags.tags) == 0 def get_custom_properties( @@ -292,7 +287,7 @@ def get_custom_properties( return dataset_properties.customProperties -def test_custom_properties_patch(wait_for_healthchecks): +def test_custom_properties_patch(graph_client: DataHubGraph): dataset_urn = make_dataset_urn( platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD" ) @@ -300,34 +295,32 @@ def test_custom_properties_patch(wait_for_healthchecks): name="test_name", description="test_description" ) helper_test_custom_properties_patch( + graph_client, test_entity_urn=dataset_urn, patch_builder_class=DatasetPatchBuilder, custom_properties_aspect_class=DatasetPropertiesClass, base_aspect=orig_dataset_properties, ) - with get_default_graph() as graph: - # Patch custom properties along with name - for patch_mcp in ( - DatasetPatchBuilder(dataset_urn) - .set_description("This is a new description") - .add_custom_property("test_description_property", "test_description_value") - .build() - ): - graph.emit_mcp(patch_mcp) - - dataset_properties: Optional[DatasetPropertiesClass] = graph.get_aspect( - dataset_urn, DatasetPropertiesClass - ) + # Patch custom properties along with name + for patch_mcp in ( + DatasetPatchBuilder(dataset_urn) + .set_description("This is a new description") + .add_custom_property("test_description_property", "test_description_value") + .build() + ): + graph_client.emit_mcp(patch_mcp) + + dataset_properties: Optional[DatasetPropertiesClass] = graph_client.get_aspect( + dataset_urn, DatasetPropertiesClass + ) - assert dataset_properties - assert dataset_properties.name == orig_dataset_properties.name - assert dataset_properties.description == "This is a new description" + assert dataset_properties + assert dataset_properties.name == orig_dataset_properties.name + assert dataset_properties.description == "This is a new description" - custom_properties = get_custom_properties(graph, dataset_urn) + custom_properties = get_custom_properties(graph_client, dataset_urn) - assert custom_properties is not None + assert custom_properties is not None - assert ( - custom_properties["test_description_property"] == "test_description_value" - ) + assert custom_properties["test_description_property"] == "test_description_value" diff --git a/smoke-test/tests/policies/test_policies.py b/smoke-test/tests/policies/test_policies.py index 186550482190c..5f691aa7b3e22 100644 --- a/smoke-test/tests/policies/test_policies.py +++ b/smoke-test/tests/policies/test_policies.py @@ -1,41 +1,17 @@ import pytest import tenacity -from tests.utils import ( - get_frontend_session, - get_frontend_url, - get_root_urn, - get_sleep_info, - wait_for_healthcheck_util, -) +from tests.utils import get_root_urn, get_sleep_info TEST_POLICY_NAME = "Updated Platform Policy" sleep_sec, sleep_times = get_sleep_info() -@pytest.fixture(scope="session") -def wait_for_healthchecks(): - wait_for_healthcheck_util() - yield - - -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - -@pytest.fixture(scope="session") -def frontend_session(wait_for_healthchecks): - yield get_frontend_session() - - -@pytest.mark.dependency(depends=["test_healthchecks"]) -@pytest.fixture(scope="class", autouse=True) -def test_frontend_list_policies(frontend_session): +@pytest.fixture(scope="module", autouse=True) +def test_frontend_list_policies(auth_session): """Fixture to execute setup before and tear down after all tests are run""" - res_data = listPolicies(frontend_session) + res_data = listPolicies(auth_session) assert res_data assert res_data["data"] @@ -55,7 +31,7 @@ def test_frontend_list_policies(frontend_session): # Run remaining tests. yield - res_data = listPolicies(frontend_session) + res_data = listPolicies(auth_session) assert res_data assert res_data["data"] @@ -72,8 +48,8 @@ def test_frontend_list_policies(frontend_session): @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_policy_present(frontend_session, new_urn): - res_data = listPolicies(frontend_session) +def _ensure_policy_present(auth_session, new_urn): + res_data = listPolicies(auth_session) assert res_data assert res_data["data"] @@ -93,8 +69,7 @@ def _ensure_policy_present(frontend_session, new_urn): assert result[0]["actors"]["allUsers"] -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_frontend_policy_operations(frontend_session): +def test_frontend_policy_operations(auth_session): json = { "query": """mutation createPolicy($input: PolicyUpdateInput!) {\n createPolicy(input: $input) }""", @@ -116,7 +91,9 @@ def test_frontend_policy_operations(frontend_session): }, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) response.raise_for_status() res_data = response.json() @@ -146,8 +123,8 @@ def test_frontend_policy_operations(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=update_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=update_json ) response.raise_for_status() res_data = response.json() @@ -158,7 +135,7 @@ def test_frontend_policy_operations(frontend_session): assert res_data["data"]["updatePolicy"] assert res_data["data"]["updatePolicy"] == new_urn - _ensure_policy_present(frontend_session, new_urn) + _ensure_policy_present(auth_session, new_urn) # Now test that the policy can be deleted json = { @@ -167,11 +144,13 @@ def test_frontend_policy_operations(frontend_session): "variables": {"urn": new_urn}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) response.raise_for_status() res_data = response.json() - res_data = listPolicies(frontend_session) + res_data = listPolicies(auth_session) assert res_data assert res_data["data"] @@ -185,7 +164,7 @@ def test_frontend_policy_operations(frontend_session): assert len(list(result)) == 0 -def listPolicies(session): +def listPolicies(auth_session): json = { "query": """query listPolicies($input: ListPoliciesInput!) {\n listPolicies(input: $input) {\n @@ -222,7 +201,9 @@ def listPolicies(session): } }, } - response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) response.raise_for_status() return response.json() diff --git a/smoke-test/tests/privileges/test_privileges.py b/smoke-test/tests/privileges/test_privileges.py index bce7b8a238c38..320dd0ab29f58 100644 --- a/smoke-test/tests/privileges/test_privileges.py +++ b/smoke-test/tests/privileges/test_privileges.py @@ -22,7 +22,6 @@ get_frontend_url, get_sleep_info, login_as, - wait_for_healthcheck_util, wait_for_writes_to_sync, ) @@ -31,24 +30,11 @@ sleep_sec, sleep_times = get_sleep_info() -@pytest.fixture(scope="session") -def wait_for_healthchecks(): - wait_for_healthcheck_util() - yield - - -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - -@pytest.fixture(scope="session") -def admin_session(wait_for_healthchecks): +@pytest.fixture(scope="module") +def admin_session(auth_session): yield get_frontend_session() -@pytest.mark.dependency(depends=["test_healthchecks"]) @pytest.fixture(scope="module", autouse=True) def privileges_and_test_user_setup(admin_session): """Fixture to execute setup before and tear down after all tests are run""" @@ -163,7 +149,6 @@ def _ensure_can_create_user_policy(session, json): return res_data["data"]["createPolicy"] -@pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_to_create_and_manage_secrets(): (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) @@ -218,7 +203,6 @@ def test_privilege_to_create_and_manage_secrets(): _ensure_cant_perform_action(user_session, create_secret, "createSecret") -@pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_to_create_and_manage_ingestion_source(): (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) @@ -327,7 +311,6 @@ def test_privilege_to_create_and_manage_ingestion_source(): ) -@pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_to_create_and_revoke_personal_access_tokens(): (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) @@ -413,7 +396,6 @@ def test_privilege_to_create_and_revoke_personal_access_tokens(): _ensure_cant_perform_action(user_session, create_access_token, "createAccessToken") -@pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_to_create_and_manage_policies(): (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) @@ -509,7 +491,6 @@ def test_privilege_to_create_and_manage_policies(): _ensure_cant_perform_action(user_session, create_policy, "createPolicy") -@pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_from_group_role_can_create_and_manage_secret(): (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) diff --git a/smoke-test/tests/read_only/test_analytics.py b/smoke-test/tests/read_only/test_analytics.py index e6a1252c04332..a7cbadac30f80 100644 --- a/smoke-test/tests/read_only/test_analytics.py +++ b/smoke-test/tests/read_only/test_analytics.py @@ -1,10 +1,8 @@ import pytest -from tests.utils import get_frontend_url - @pytest.mark.read_only -def test_highlights_is_accessible(frontend_session): +def test_highlights_is_accessible(auth_session): json = { "query": """ query getHighlights { @@ -16,13 +14,15 @@ def test_highlights_is_accessible(frontend_session): } """, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) res_json = response.json() assert res_json, f"Received JSON was {res_json}" @pytest.mark.read_only -def test_analytics_chart_is_accessible(frontend_session): +def test_analytics_chart_is_accessible(auth_session): json = { "query": """ query getAnalyticsCharts { @@ -33,13 +33,15 @@ def test_analytics_chart_is_accessible(frontend_session): } """, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) res_json = response.json() assert res_json, f"Received JSON was {res_json}" @pytest.mark.read_only -def test_metadata_analytics_chart_is_accessible(frontend_session): +def test_metadata_analytics_chart_is_accessible(auth_session): json = { "query": """ query getMetadataAnalyticsCharts($input: MetadataAnalyticsInput!) { @@ -51,6 +53,8 @@ def test_metadata_analytics_chart_is_accessible(frontend_session): """, "variables": {"input": {"query": "*"}}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) res_json = response.json() assert res_json, f"Received JSON was {res_json}" diff --git a/smoke-test/tests/read_only/test_ingestion_list.py b/smoke-test/tests/read_only/test_ingestion_list.py index 17868b1b6080d..36388512c6a45 100644 --- a/smoke-test/tests/read_only/test_ingestion_list.py +++ b/smoke-test/tests/read_only/test_ingestion_list.py @@ -1,11 +1,10 @@ import pytest from tests.test_result_msg import add_datahub_stats -from tests.utils import get_frontend_url @pytest.mark.read_only -def test_policies_are_accessible(frontend_session): +def test_policies_are_accessible(auth_session): json = { "query": """ query listIngestionSources($input: ListIngestionSourcesInput!) { @@ -25,7 +24,9 @@ def test_policies_are_accessible(frontend_session): "variables": {"input": {"query": "*"}}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) res_json = response.json() assert res_json, f"Received JSON was {res_json}" diff --git a/smoke-test/tests/read_only/test_policies.py b/smoke-test/tests/read_only/test_policies.py index 8e4d6a4be29a4..306089b92d964 100644 --- a/smoke-test/tests/read_only/test_policies.py +++ b/smoke-test/tests/read_only/test_policies.py @@ -1,11 +1,10 @@ import pytest from tests.test_result_msg import add_datahub_stats -from tests.utils import get_frontend_url @pytest.mark.read_only -def test_policies_are_accessible(frontend_session): +def test_policies_are_accessible(auth_session): json = { "query": """ query listPolicies($input: ListPoliciesInput!) { @@ -22,7 +21,9 @@ def test_policies_are_accessible(frontend_session): "variables": {"input": {"query": "*"}}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) res_json = response.json() assert res_json, f"Received JSON was {res_json}" diff --git a/smoke-test/tests/read_only/test_search.py b/smoke-test/tests/read_only/test_search.py index 3b9635f3da2cd..36ecf68395f91 100644 --- a/smoke-test/tests/read_only/test_search.py +++ b/smoke-test/tests/read_only/test_search.py @@ -1,8 +1,7 @@ import pytest -import requests from tests.test_result_msg import add_datahub_stats -from tests.utils import get_frontend_session, get_frontend_url, get_gms_url +from tests.utils import get_gms_url BASE_URL_V3 = f"{get_gms_url()}/openapi/v3" @@ -30,7 +29,7 @@ } -def _get_search_result(frontend_session, entity: str): +def _get_search_result(auth_session, entity: str): json = { "query": """ query search($input: SearchInput!) { @@ -46,7 +45,9 @@ def _get_search_result(frontend_session, entity: str): """, "variables": {"input": {"type": ENTITY_TO_MAP.get(entity), "query": "*"}}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) print(f"Response text was {response.text}") res_data = response.json() assert res_data, f"response data was {res_data}" @@ -77,9 +78,8 @@ def _get_search_result(frontend_session, entity: str): ("mlModel", "mlModel"), ], ) -def test_search_works(entity_type, api_name): - frontend_session = get_frontend_session() - search_result = _get_search_result(frontend_session, entity_type) +def test_search_works(auth_session, entity_type, api_name): + search_result = _get_search_result(auth_session, entity_type) num_entities = search_result["total"] add_datahub_stats(f"num-{entity_type}", num_entities) if num_entities == 0: @@ -104,7 +104,9 @@ def test_search_works(entity_type, api_name): "variables": {"input": first_urn}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) response.raise_for_status() res_data = response.json() assert res_data["data"], f"res_data was {res_data}" @@ -133,9 +135,8 @@ def test_search_works(entity_type, api_name): "mlModel", ], ) -def test_openapi_v3_entity(entity_type): - frontend_session = get_frontend_session() - search_result = _get_search_result(frontend_session, entity_type) +def test_openapi_v3_entity(auth_session, entity_type): + search_result = _get_search_result(auth_session, entity_type) num_entities = search_result["total"] if num_entities == 0: print(f"[WARN] No results for {entity_type}") @@ -144,9 +145,8 @@ def test_openapi_v3_entity(entity_type): first_urn = entities[0]["entity"]["urn"] - session = requests.Session() url = f"{BASE_URL_V3}/entity/{entity_type}/{first_urn}" - response = session.get(url, headers=default_headers) + response = auth_session.get(url, headers=default_headers) response.raise_for_status() actual_data = response.json() print(f"Entity Data for URN {first_urn}: {actual_data}") diff --git a/smoke-test/tests/read_only/test_services_up.py b/smoke-test/tests/read_only/test_services_up.py index 1fd43f884323c..12ff04965548f 100644 --- a/smoke-test/tests/read_only/test_services_up.py +++ b/smoke-test/tests/read_only/test_services_up.py @@ -2,26 +2,18 @@ import re import pytest -import requests - -from tests.utils import get_gms_url, wait_for_healthcheck_util # Kept separate so that it does not cause failures in PRs DATAHUB_VERSION = os.getenv("TEST_DATAHUB_VERSION") -@pytest.mark.read_only -def test_services_up(): - wait_for_healthcheck_util() - - def looks_like_a_short_sha(sha: str) -> bool: return len(sha) == 7 and re.match(r"[0-9a-f]{7}", sha) is not None @pytest.mark.read_only -def test_gms_config_accessible() -> None: - gms_config = requests.get(f"{get_gms_url()}/config").json() +def test_gms_config_accessible(auth_session) -> None: + gms_config = auth_session.get(f"{auth_session.gms_url()}/config").json() assert gms_config is not None if DATAHUB_VERSION is not None: diff --git a/smoke-test/tests/schema_fields/schema_evolution.py b/smoke-test/tests/schema_fields/schema_evolution.py index 256ae3c29ac04..5481742969f5d 100644 --- a/smoke-test/tests/schema_fields/schema_evolution.py +++ b/smoke-test/tests/schema_fields/schema_evolution.py @@ -6,10 +6,10 @@ from datahub.cli.cli_utils import get_aspects_for_entity from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.graph.client import DataHubGraph, get_default_graph +from datahub.ingestion.graph.client import DataHubGraph from tenacity import retry, stop_after_delay, wait_fixed -from tests.utils import get_datahub_graph, ingest_file_via_rest, wait_for_writes_to_sync +from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync _MAX_DELAY_UNTIL_WRITES_VISIBLE_SECS = 30 _ATTEMPT_RETRY_INTERVAL_SECS = 1 @@ -62,15 +62,14 @@ def _create_schema_with_fields( return schema -@pytest.fixture(autouse=False) -def test_setup(): +@pytest.fixture() +def test_setup(auth_session, graph_client): """Fixture data""" - client = get_datahub_graph() - session = client._session - gms_host = client.config.server + session = graph_client._session + gms_host = graph_client.config.server ingest_file_via_rest( - "tests/schema_fields/schema_field_side_effect_data.json" + auth_session, "tests/schema_fields/schema_field_side_effect_data.json" ).config.run_id assert "schemaMetadata" in get_aspects_for_entity( @@ -129,7 +128,9 @@ def assert_schema_field_soft_deleted(graph: DataHubGraph, urn: str, field_path: FieldPathStyle.FLAT, ], ) -def test_schema_evolution_field_dropped(field_path_style: FieldPathStyle): +def test_schema_evolution_field_dropped( + graph_client: DataHubGraph, field_path_style: FieldPathStyle +): """ Test that schema evolution works as expected 1. Create a schema with 2 fields @@ -143,43 +144,43 @@ def test_schema_evolution_field_dropped(field_path_style: FieldPathStyle): urn = make_dataset_urn("bigquery", f"my_dataset.my_table.{now}") print(urn) - with get_default_graph() as graph: - schema_with_2_fields = _create_schema_with_fields( - urn, 2, field_path_style=field_path_style - ) - field_names = [field.fieldPath for field in schema_with_2_fields.fields] - graph.emit( - MetadataChangeProposalWrapper( - entityUrn=urn, - aspect=schema_with_2_fields, - ) + + schema_with_2_fields = _create_schema_with_fields( + urn, 2, field_path_style=field_path_style + ) + field_names = [field.fieldPath for field in schema_with_2_fields.fields] + graph_client.emit( + MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=schema_with_2_fields, ) + ) - for field_name in field_names: - print("Checking field: ", field_name) - assert_schema_field_exists(graph, urn, field_name) + for field_name in field_names: + print("Checking field: ", field_name) + assert_schema_field_exists(graph_client, urn, field_name) - # Evolve the schema - schema_with_1_field = _create_schema_with_fields( - urn, 1, field_path_style=field_path_style - ) - new_field_name = schema_with_1_field.fields[0].fieldPath + # Evolve the schema + schema_with_1_field = _create_schema_with_fields( + urn, 1, field_path_style=field_path_style + ) + new_field_name = schema_with_1_field.fields[0].fieldPath - field_names.remove(new_field_name) - removed_field_name = field_names[0] + field_names.remove(new_field_name) + removed_field_name = field_names[0] - graph.emit( - MetadataChangeProposalWrapper( - entityUrn=urn, - aspect=schema_with_1_field, - ) + graph_client.emit( + MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=schema_with_1_field, ) + ) - assert_schema_field_exists(graph, urn, new_field_name) - assert_schema_field_soft_deleted(graph, urn, removed_field_name) + assert_schema_field_exists(graph_client, urn, new_field_name) + assert_schema_field_soft_deleted(graph_client, urn, removed_field_name) -def test_soft_deleted_entity(): +def test_soft_deleted_entity(graph_client: DataHubGraph): """ Test that we if there is a soft deleted dataset, its schema fields are initialized with soft deleted status @@ -190,41 +191,35 @@ def test_soft_deleted_entity(): urn = make_dataset_urn("bigquery", f"my_dataset.my_table.{now}") print(urn) - with get_default_graph() as graph: - schema_with_2_fields = _create_schema_with_fields(urn, 2) - field_names = [field.fieldPath for field in schema_with_2_fields.fields] - graph.emit( - MetadataChangeProposalWrapper( - entityUrn=urn, - aspect=schema_with_2_fields, - ) + + schema_with_2_fields = _create_schema_with_fields(urn, 2) + field_names = [field.fieldPath for field in schema_with_2_fields.fields] + graph_client.emit( + MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=schema_with_2_fields, ) + ) - for field_name in field_names: - print("Checking field: ", field_name) - assert_schema_field_exists(graph, urn, field_name) + for field_name in field_names: + print("Checking field: ", field_name) + assert_schema_field_exists(graph_client, urn, field_name) - # Soft delete the dataset - graph.emit( - MetadataChangeProposalWrapper( - entityUrn=urn, - aspect=models.StatusClass(removed=True), - ) + # Soft delete the dataset + graph_client.emit( + MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=models.StatusClass(removed=True), ) + ) - # Check that the fields are soft deleted - for field_name in field_names: - assert_schema_field_soft_deleted(graph, urn, field_name) + # Check that the fields are soft deleted + for field_name in field_names: + assert_schema_field_soft_deleted(graph_client, urn, field_name) # Note: Does not execute deletes, too slow for CI @pytest.mark.dependency() -def test_large_schema(test_setup): +def test_large_schema(graph_client: DataHubGraph, test_setup): wait_for_writes_to_sync() - with get_default_graph() as graph: - assert_schema_field_exists(graph, large_dataset_urn, "last_of.6800_cols") - - -if __name__ == "__main__": - test_schema_evolution_field_dropped() - test_soft_deleted_entity() + assert_schema_field_exists(graph_client, large_dataset_urn, "last_of.6800_cols") diff --git a/smoke-test/tests/schema_fields/test_schemafields.py b/smoke-test/tests/schema_fields/test_schemafields.py index cd282db81ff72..30d705ef2b4e7 100644 --- a/smoke-test/tests/schema_fields/test_schemafields.py +++ b/smoke-test/tests/schema_fields/test_schemafields.py @@ -10,12 +10,11 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext, RecordEnvelope from datahub.ingestion.api.sink import NoopWriteCallback -from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph +from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.sink.file import FileSink, FileSinkConfig from tests.utils import ( delete_urns_from_file, - get_gms_url, get_sleep_info, ingest_file_via_rest, wait_for_writes_to_sync, @@ -101,27 +100,23 @@ def create_test_data(filename: str, chart_urn: str, upstream_schema_field_urn: s sleep_sec, sleep_times = get_sleep_info() -@pytest.fixture(scope="module", autouse=False) -def ingest_cleanup_data(request, chart_urn, upstream_schema_field_urn): +@pytest.fixture(scope="module") +def ingest_cleanup_data( + auth_session, graph_client, request, chart_urn, upstream_schema_field_urn +): new_file, filename = tempfile.mkstemp(suffix=".json") try: create_test_data(filename, chart_urn, upstream_schema_field_urn) print("ingesting schema fields test data") - ingest_file_via_rest(filename) + ingest_file_via_rest(auth_session, filename) yield print("removing schema fields test data") - delete_urns_from_file(filename) + delete_urns_from_file(graph_client, filename) wait_for_writes_to_sync() finally: os.remove(filename) -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - def get_gql_query(filename: str) -> str: with open(filename) as fp: return fp.read() @@ -165,10 +160,9 @@ def validate_schema_field_urn_for_chart( # @tenacity.retry( # stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) # ) -@pytest.mark.dependency(depends=["test_healthchecks"]) def test_schema_field_gql_mapper_for_charts( - ingest_cleanup_data, chart_urn, upstream_schema_field_urn + graph_client, ingest_cleanup_data, chart_urn, upstream_schema_field_urn ): - graph: DataHubGraph = DataHubGraph(config=DatahubClientConfig(server=get_gms_url())) - - validate_schema_field_urn_for_chart(graph, chart_urn, upstream_schema_field_urn) + validate_schema_field_urn_for_chart( + graph_client, chart_urn, upstream_schema_field_urn + ) diff --git a/smoke-test/tests/setup/lineage/ingest_data_job_change.py b/smoke-test/tests/setup/lineage/ingest_data_job_change.py index 2746baf89600e..85924569ba9f0 100644 --- a/smoke-test/tests/setup/lineage/ingest_data_job_change.py +++ b/smoke-test/tests/setup/lineage/ingest_data_job_change.py @@ -5,7 +5,7 @@ make_data_job_urn_with_flow, make_dataset_urn, ) -from datahub.emitter.rest_emitter import DatahubRestEmitter +from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.schema_classes import ( DateTypeClass, NumberTypeClass, @@ -116,11 +116,11 @@ ) -def ingest_data_job_change(emitter: DatahubRestEmitter) -> None: +def ingest_data_job_change(graph_client: DataHubGraph) -> None: # Case 2. Data job changes from temperature_etl_1 to temperature_etl_2 - emit_mcps(emitter, create_node(DAILY_TEMPERATURE_DATASET)) - emit_mcps(emitter, create_node(MONTHLY_TEMPERATURE_DATASET)) - emit_mcps(emitter, create_nodes_and_edges(AIRFLOW_SNOWFLAKE_ETL)) + emit_mcps(graph_client, create_node(DAILY_TEMPERATURE_DATASET)) + emit_mcps(graph_client, create_node(MONTHLY_TEMPERATURE_DATASET)) + emit_mcps(graph_client, create_nodes_and_edges(AIRFLOW_SNOWFLAKE_ETL)) def get_data_job_change_urns() -> List[str]: diff --git a/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py b/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py index 4a8da1fcf0588..36fe3d19eff41 100644 --- a/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py +++ b/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py @@ -1,7 +1,7 @@ from typing import List from datahub.emitter.mce_builder import make_dataset_urn -from datahub.emitter.rest_emitter import DatahubRestEmitter +from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.schema_classes import ( NumberTypeClass, SchemaFieldDataTypeClass, @@ -74,11 +74,11 @@ ) -def ingest_dataset_join_change(emitter: DatahubRestEmitter) -> None: +def ingest_dataset_join_change(graph_client: DataHubGraph) -> None: # Case 3. gnp has two upstreams originally (gdp and factor_income), but later factor_income is removed. - emit_mcps(emitter, create_node(GDP_DATASET)) - emit_mcps(emitter, create_node(FACTOR_INCOME_DATASET)) - emit_mcps(emitter, create_node(GNP_DATASET)) + emit_mcps(graph_client, create_node(GDP_DATASET)) + emit_mcps(graph_client, create_node(FACTOR_INCOME_DATASET)) + emit_mcps(graph_client, create_node(GNP_DATASET)) d3_d1_edge: UpstreamClass = create_upstream_edge( GDP_DATASET_URN, TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, @@ -100,7 +100,7 @@ def ingest_dataset_join_change(emitter: DatahubRestEmitter) -> None: TIMESTAMP_MILLIS_ONE_DAY_AGO, run_id="gdp_gnp", ) - emitter.emit_mcp(case_3_mcp) + graph_client.emit_mcp(case_3_mcp) def get_dataset_join_change_urns() -> List[str]: diff --git a/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py b/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py index 143c65c082656..9ff0cfcc1182f 100644 --- a/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py +++ b/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py @@ -5,7 +5,7 @@ make_data_job_urn_with_flow, make_dataset_urn, ) -from datahub.emitter.rest_emitter import DatahubRestEmitter +from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.schema_classes import ( NumberTypeClass, SchemaFieldDataTypeClass, @@ -116,13 +116,13 @@ ) -def ingest_input_datasets_change(emitter: DatahubRestEmitter) -> None: +def ingest_input_datasets_change(graph_client: DataHubGraph) -> None: # Case 2. transactions_etl has one upstream originally (transactions), but later has both transactions and # user_profile. - emit_mcps(emitter, create_node(TRANSACTIONS_DATASET)) - emit_mcps(emitter, create_node(USER_PROFILE_DATASET)) - emit_mcps(emitter, create_node(AGGREGATED_TRANSACTIONS_DATASET)) - emit_mcps(emitter, create_nodes_and_edges(AIRFLOW_BQ_ETL)) + emit_mcps(graph_client, create_node(TRANSACTIONS_DATASET)) + emit_mcps(graph_client, create_node(USER_PROFILE_DATASET)) + emit_mcps(graph_client, create_node(AGGREGATED_TRANSACTIONS_DATASET)) + emit_mcps(graph_client, create_nodes_and_edges(AIRFLOW_BQ_ETL)) def get_input_datasets_change_urns() -> List[str]: diff --git a/smoke-test/tests/setup/lineage/ingest_time_lineage.py b/smoke-test/tests/setup/lineage/ingest_time_lineage.py index 116e6cd63dd9f..3d34ea5426c58 100644 --- a/smoke-test/tests/setup/lineage/ingest_time_lineage.py +++ b/smoke-test/tests/setup/lineage/ingest_time_lineage.py @@ -1,7 +1,6 @@ -import os from typing import List -from datahub.emitter.rest_emitter import DatahubRestEmitter +from datahub.ingestion.graph.client import DataHubGraph from tests.setup.lineage.ingest_data_job_change import ( get_data_job_change_urns, @@ -16,15 +15,11 @@ ingest_input_datasets_change, ) -SERVER = os.getenv("DATAHUB_SERVER") or "http://localhost:8080" -TOKEN = os.getenv("DATAHUB_TOKEN") or "" -EMITTER = DatahubRestEmitter(gms_server=SERVER, token=TOKEN) - -def ingest_time_lineage() -> None: - ingest_input_datasets_change(EMITTER) - ingest_data_job_change(EMITTER) - ingest_dataset_join_change(EMITTER) +def ingest_time_lineage(graph_client: DataHubGraph) -> None: + ingest_input_datasets_change(graph_client) + ingest_data_job_change(graph_client) + ingest_dataset_join_change(graph_client) def get_time_lineage_urns() -> List[str]: diff --git a/smoke-test/tests/setup/lineage/utils.py b/smoke-test/tests/setup/lineage/utils.py index d4c16ed3b7a21..a58e73cb92798 100644 --- a/smoke-test/tests/setup/lineage/utils.py +++ b/smoke-test/tests/setup/lineage/utils.py @@ -7,7 +7,7 @@ make_dataset_urn, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.emitter.rest_emitter import DatahubRestEmitter +from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage from datahub.metadata.com.linkedin.pegasus2avro.mxe import SystemMetadata from datahub.metadata.schema_classes import ( @@ -200,7 +200,7 @@ def create_upstream_mcp( def emit_mcps( - emitter: DatahubRestEmitter, mcps: List[MetadataChangeProposalWrapper] + graph_client: DataHubGraph, mcps: List[MetadataChangeProposalWrapper] ) -> None: for mcp in mcps: - emitter.emit_mcp(mcp) + graph_client.emit_mcp(mcp) diff --git a/smoke-test/tests/structured_properties/test_structured_properties.py b/smoke-test/tests/structured_properties/test_structured_properties.py index f2327a13df6d0..8b6fead789b3f 100644 --- a/smoke-test/tests/structured_properties/test_structured_properties.py +++ b/smoke-test/tests/structured_properties/test_structured_properties.py @@ -13,7 +13,7 @@ ) from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph +from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.schema_classes import ( EntityTypeInfoClass, PropertyValueClass, @@ -30,7 +30,6 @@ from tests.utils import ( delete_urns, delete_urns_from_file, - get_gms_url, get_sleep_info, ingest_file_via_rest, ) @@ -78,34 +77,22 @@ def create_test_data(filename: str): sleep_sec, sleep_times = get_sleep_info() -@pytest.fixture(scope="module", autouse=False) -def graph() -> DataHubGraph: - graph: DataHubGraph = DataHubGraph(config=DatahubClientConfig(server=get_gms_url())) - return graph - - -@pytest.fixture(scope="module", autouse=False) -def ingest_cleanup_data(request): +@pytest.fixture(scope="module") +def ingest_cleanup_data(auth_session, graph_client, request): new_file, filename = tempfile.mkstemp() try: create_test_data(filename) print("ingesting structured properties test data") - ingest_file_via_rest(filename) + ingest_file_via_rest(auth_session, filename) yield print("removing structured properties test data") - delete_urns_from_file(filename) - delete_urns(generated_urns) + delete_urns_from_file(graph_client, filename) + delete_urns(graph_client, generated_urns) wait_for_writes_to_sync() finally: os.remove(filename) -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - def create_property_definition( property_name: str, graph: DataHubGraph, @@ -191,16 +178,19 @@ def to_es_name(property_name=None, namespace=default_namespace, qualified_name=N # stop=tenacity.stop_after_attempt(sleep_times), # wait=tenacity.wait_fixed(sleep_sec), # ) -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_structured_property_string(ingest_cleanup_data, graph): +def test_structured_property_string(ingest_cleanup_data, graph_client): property_name = f"retention{randint(10, 10000)}Policy" - create_property_definition(property_name, graph) + create_property_definition(property_name, graph_client) - attach_property_to_entity(dataset_urns[0], property_name, ["30d"], graph=graph) + attach_property_to_entity( + dataset_urns[0], property_name, ["30d"], graph=graph_client + ) try: - attach_property_to_entity(dataset_urns[0], property_name, 200030, graph=graph) + attach_property_to_entity( + dataset_urns[0], property_name, 200030, graph=graph_client + ) raise AssertionError( "Should not be able to attach a number to a string property" ) @@ -215,17 +205,18 @@ def test_structured_property_string(ingest_cleanup_data, graph): # stop=tenacity.stop_after_attempt(sleep_times), # wait=tenacity.wait_fixed(sleep_sec), # ) -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_structured_property_double(ingest_cleanup_data, graph): +def test_structured_property_double(ingest_cleanup_data, graph_client): property_name = f"expiryTime{randint(10, 10000)}" - create_property_definition(property_name, graph, value_type="number") + create_property_definition(property_name, graph_client, value_type="number") - attach_property_to_entity(dataset_urns[0], property_name, 2000034, graph=graph) + attach_property_to_entity( + dataset_urns[0], property_name, 2000034, graph=graph_client + ) try: attach_property_to_entity( - dataset_urns[0], property_name, "30 days", graph=graph + dataset_urns[0], property_name, "30 days", graph=graph_client ) raise AssertionError( "Should not be able to attach a string to a number property" @@ -238,7 +229,7 @@ def test_structured_property_double(ingest_cleanup_data, graph): try: attach_property_to_entity( - dataset_urns[0], property_name, [2000034, 2000035], graph=graph + dataset_urns[0], property_name, [2000034, 2000035], graph=graph_client ) raise AssertionError("Should not be able to attach a list to a number property") except Exception as e: @@ -252,28 +243,28 @@ def test_structured_property_double(ingest_cleanup_data, graph): # stop=tenacity.stop_after_attempt(sleep_times), # wait=tenacity.wait_fixed(sleep_sec), # ) -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_structured_property_double_multiple(ingest_cleanup_data, graph): +def test_structured_property_double_multiple(ingest_cleanup_data, graph_client): property_name = f"versions{randint(10, 10000)}" create_property_definition( - property_name, graph, value_type="number", cardinality="MULTIPLE" + property_name, graph_client, value_type="number", cardinality="MULTIPLE" ) - attach_property_to_entity(dataset_urns[0], property_name, [1.0, 2.0], graph=graph) + attach_property_to_entity( + dataset_urns[0], property_name, [1.0, 2.0], graph=graph_client + ) # @tenacity.retry( # stop=tenacity.stop_after_attempt(sleep_times), # wait=tenacity.wait_fixed(sleep_sec), # ) -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_structured_property_string_allowed_values(ingest_cleanup_data, graph): +def test_structured_property_string_allowed_values(ingest_cleanup_data, graph_client): property_name = f"enumProperty{randint(10, 10000)}" create_property_definition( property_name, - graph, + graph_client, value_type="string", cardinality="MULTIPLE", allowed_values=[ @@ -283,12 +274,12 @@ def test_structured_property_string_allowed_values(ingest_cleanup_data, graph): ) attach_property_to_entity( - dataset_urns[0], property_name, ["foo", "bar"], graph=graph + dataset_urns[0], property_name, ["foo", "bar"], graph=graph_client ) try: attach_property_to_entity( - dataset_urns[0], property_name, ["foo", "baz"], graph=graph + dataset_urns[0], property_name, ["foo", "baz"], graph=graph_client ) raise AssertionError( "Should not be able to attach a value not in allowed values" @@ -300,13 +291,12 @@ def test_structured_property_string_allowed_values(ingest_cleanup_data, graph): raise e -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_structured_property_definition_evolution(ingest_cleanup_data, graph): +def test_structured_property_definition_evolution(ingest_cleanup_data, graph_client): property_name = f"enumProperty{randint(10, 10000)}" create_property_definition( property_name, - graph, + graph_client, value_type="string", cardinality="MULTIPLE", allowed_values=[ @@ -318,7 +308,7 @@ def test_structured_property_definition_evolution(ingest_cleanup_data, graph): try: create_property_definition( property_name, - graph, + graph_client, value_type="string", cardinality="SINGLE", allowed_values=[ @@ -340,13 +330,12 @@ def test_structured_property_definition_evolution(ingest_cleanup_data, graph): # stop=tenacity.stop_after_attempt(sleep_times), # wait=tenacity.wait_fixed(sleep_sec), # ) -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_structured_property_schema_field(ingest_cleanup_data, graph): +def test_structured_property_schema_field(ingest_cleanup_data, graph_client): property_name = f"deprecationDate{randint(10, 10000)}" create_property_definition( property_name, - graph, + graph_client, namespace="io.datahubproject.test", value_type="date", entity_types=["schemaField"], @@ -356,12 +345,14 @@ def test_structured_property_schema_field(ingest_cleanup_data, graph): schema_field_urns[0], property_name, "2020-10-01", - graph=graph, + graph=graph_client, namespace="io.datahubproject.test", ) assert get_property_from_entity( - schema_field_urns[0], f"io.datahubproject.test.{property_name}", graph=graph + schema_field_urns[0], + f"io.datahubproject.test.{property_name}", + graph=graph_client, ) == ["2020-10-01"] try: @@ -369,7 +360,7 @@ def test_structured_property_schema_field(ingest_cleanup_data, graph): schema_field_urns[0], property_name, 200030, - graph=graph, + graph=graph_client, namespace="io.datahubproject.test", ) raise AssertionError("Should not be able to attach a number to a DATE property") @@ -380,25 +371,26 @@ def test_structured_property_schema_field(ingest_cleanup_data, graph): raise e -def test_dataset_yaml_loader(ingest_cleanup_data, graph): +def test_dataset_yaml_loader(ingest_cleanup_data, graph_client): StructuredProperties.create( - "tests/structured_properties/test_structured_properties.yaml" + "tests/structured_properties/test_structured_properties.yaml", + graph=graph_client, ) for dataset in Dataset.from_yaml("tests/structured_properties/test_dataset.yaml"): for mcp in dataset.generate_mcp(): - graph.emit(mcp) - wait_for_writes_to_sync() + graph_client.emit(mcp) + wait_for_writes_to_sync() property_name = "io.acryl.dataManagement.deprecationDate" assert get_property_from_entity( make_schema_field_urn(make_dataset_urn("hive", "user.clicks"), "ip"), property_name, - graph=graph, + graph=graph_client, ) == ["2023-01-01"] dataset = Dataset.from_datahub( - graph=graph, + graph=graph_client, urn="urn:li:dataset:(urn:li:dataPlatform:hive,user.clicks,PROD)", ) field_name = "ip" @@ -416,14 +408,16 @@ def test_dataset_yaml_loader(ingest_cleanup_data, graph): ] == ["2023-01-01"] -def test_structured_property_search(ingest_cleanup_data, graph: DataHubGraph, caplog): +def test_structured_property_search( + ingest_cleanup_data, graph_client: DataHubGraph, caplog +): # Attach structured property to entity and to field field_property_name = f"deprecationDate{randint(10, 10000)}" create_property_definition( namespace="io.datahubproject.test", property_name=field_property_name, - graph=graph, + graph=graph_client, value_type="date", entity_types=["schemaField"], ) @@ -432,7 +426,7 @@ def test_structured_property_search(ingest_cleanup_data, graph: DataHubGraph, ca schema_field_urns[0], field_property_name, "2020-10-01", - graph=graph, + graph=graph_client, namespace="io.datahubproject.test", ) dataset_property_name = f"replicationSLA{randint(10, 10000)}" @@ -440,16 +434,16 @@ def test_structured_property_search(ingest_cleanup_data, graph: DataHubGraph, ca value_type = "number" create_property_definition( - property_name=dataset_property_name, graph=graph, value_type=value_type + property_name=dataset_property_name, graph=graph_client, value_type=value_type ) attach_property_to_entity( - dataset_urns[0], dataset_property_name, [property_value], graph=graph + dataset_urns[0], dataset_property_name, [property_value], graph=graph_client ) # [] = default entities which includes datasets, does not include fields entity_urns = list( - graph.get_urns_by_filter( + graph_client.get_urns_by_filter( extraFilters=[ { "field": to_es_name(dataset_property_name), @@ -463,7 +457,7 @@ def test_structured_property_search(ingest_cleanup_data, graph: DataHubGraph, ca assert entity_urns[0] == dataset_urns[0] # Search over schema field specifically - field_structured_prop = graph.get_aspect( + field_structured_prop = graph_client.get_aspect( entity_urn=schema_field_urns[0], aspect_type=StructuredPropertiesClass ) assert field_structured_prop == StructuredPropertiesClass( @@ -477,7 +471,7 @@ def test_structured_property_search(ingest_cleanup_data, graph: DataHubGraph, ca # Search over entities that do not include the field field_urns = list( - graph.get_urns_by_filter( + graph_client.get_urns_by_filter( entity_types=["tag"], extraFilters=[ { @@ -494,7 +488,7 @@ def test_structured_property_search(ingest_cleanup_data, graph: DataHubGraph, ca # OR the two properties together to return both results field_urns = list( - graph.get_urns_by_filter( + graph_client.get_urns_by_filter( entity_types=["dataset", "tag"], extraFilters=[ { @@ -509,7 +503,7 @@ def test_structured_property_search(ingest_cleanup_data, graph: DataHubGraph, ca assert dataset_urns[0] in field_urns -def test_dataset_structured_property_patch(ingest_cleanup_data, graph, caplog): +def test_dataset_structured_property_patch(ingest_cleanup_data, graph_client, caplog): # Create 1st Property property_name = f"replicationSLA{randint(10, 10000)}" property_value1 = 30.0 @@ -519,7 +513,7 @@ def test_dataset_structured_property_patch(ingest_cleanup_data, graph, caplog): create_property_definition( property_name=property_name, - graph=graph, + graph=graph_client, value_type=value_type, cardinality=cardinality, ) @@ -529,7 +523,7 @@ def test_dataset_structured_property_patch(ingest_cleanup_data, graph, caplog): property_value_other = 200.0 create_property_definition( property_name=property_name_other, - graph=graph, + graph=graph_client, value_type=value_type, cardinality=cardinality, ) @@ -544,14 +538,14 @@ def patch_one(prop_name, prop_value): ) for mcp in dataset_patcher.build(): - graph.emit(mcp) + graph_client.emit(mcp) wait_for_writes_to_sync() # Add 1 value for property 1 patch_one(property_name, property_value1) actual_property_values = get_property_from_entity( - dataset_urns[0], f"{default_namespace}.{property_name}", graph=graph + dataset_urns[0], f"{default_namespace}.{property_name}", graph=graph_client ) assert actual_property_values == [property_value1] @@ -559,7 +553,9 @@ def patch_one(prop_name, prop_value): patch_one(property_name_other, property_value_other) actual_property_values = get_property_from_entity( - dataset_urns[0], f"{default_namespace}.{property_name_other}", graph=graph + dataset_urns[0], + f"{default_namespace}.{property_name_other}", + graph=graph_client, ) assert actual_property_values == [property_value_other] @@ -568,20 +564,22 @@ def patch_one(prop_name, prop_value): actual_property_values = set( get_property_from_entity( - dataset_urns[0], f"{default_namespace}.{property_name}", graph=graph + dataset_urns[0], f"{default_namespace}.{property_name}", graph=graph_client ) ) assert actual_property_values == {property_value1, property_value2} # Validate property 2 is the same actual_property_values = get_property_from_entity( - dataset_urns[0], f"{default_namespace}.{property_name_other}", graph=graph + dataset_urns[0], + f"{default_namespace}.{property_name_other}", + graph=graph_client, ) assert actual_property_values == [property_value_other] def test_dataset_structured_property_soft_delete_validation( - ingest_cleanup_data, graph, caplog + ingest_cleanup_data, graph_client, caplog ): property_name = f"softDeleteTest{randint(10, 10000)}Property" value_type = "string" @@ -589,21 +587,23 @@ def test_dataset_structured_property_soft_delete_validation( create_property_definition( property_name=property_name, - graph=graph, + graph=graph_client, value_type=value_type, cardinality="SINGLE", ) - test_property = StructuredProperties.from_datahub(graph=graph, urn=property_urn) + test_property = StructuredProperties.from_datahub( + graph=graph_client, urn=property_urn + ) assert test_property is not None - graph.soft_delete_entity(urn=property_urn) + graph_client.soft_delete_entity(urn=property_urn) # Attempt to modify soft deleted definition try: create_property_definition( property_name=property_name, - graph=graph, + graph=graph_client, value_type=value_type, cardinality="SINGLE", ) @@ -619,7 +619,7 @@ def test_dataset_structured_property_soft_delete_validation( # Attempt to add soft deleted structured property to entity try: attach_property_to_entity( - dataset_urns[0], property_name, "test string", graph=graph + dataset_urns[0], property_name, "test string", graph=graph_client ) raise AssertionError( "Should not be able to apply a soft deleted structured property to another entity" @@ -632,7 +632,7 @@ def test_dataset_structured_property_soft_delete_validation( def test_dataset_structured_property_soft_delete_read_mutation( - ingest_cleanup_data, graph, caplog + ingest_cleanup_data, graph_client, caplog ): property_name = f"softDeleteReadTest{randint(10, 10000)}Property" value_type = "string" @@ -642,33 +642,33 @@ def test_dataset_structured_property_soft_delete_read_mutation( # Create property on a dataset create_property_definition( property_name=property_name, - graph=graph, + graph=graph_client, value_type=value_type, cardinality="SINGLE", ) attach_property_to_entity( - dataset_urns[0], property_name, property_value, graph=graph + dataset_urns[0], property_name, property_value, graph=graph_client ) # Make sure it exists on the dataset actual_property_values = get_property_from_entity( - dataset_urns[0], f"{default_namespace}.{property_name}", graph=graph + dataset_urns[0], f"{default_namespace}.{property_name}", graph=graph_client ) assert actual_property_values == [property_value] # Soft delete the structured property - graph.soft_delete_entity(urn=property_urn) + graph_client.soft_delete_entity(urn=property_urn) wait_for_writes_to_sync() # Make sure it is no longer returned on the dataset actual_property_values = get_property_from_entity( - dataset_urns[0], f"{default_namespace}.{property_name}", graph=graph + dataset_urns[0], f"{default_namespace}.{property_name}", graph=graph_client ) assert actual_property_values is None def test_dataset_structured_property_soft_delete_search_filter_validation( - ingest_cleanup_data, graph, caplog + ingest_cleanup_data, graph_client, caplog ): # Create a test structured property dataset_property_name = f"softDeleteSearchFilter{randint(10, 10000)}" @@ -679,15 +679,15 @@ def test_dataset_structured_property_soft_delete_search_filter_validation( ) create_property_definition( - property_name=dataset_property_name, graph=graph, value_type=value_type + property_name=dataset_property_name, graph=graph_client, value_type=value_type ) attach_property_to_entity( - dataset_urns[0], dataset_property_name, [property_value], graph=graph + dataset_urns[0], dataset_property_name, [property_value], graph=graph_client ) # Perform search, make sure it works entity_urns = list( - graph.get_urns_by_filter( + graph_client.get_urns_by_filter( extraFilters=[ { "field": to_es_name(dataset_property_name), @@ -701,13 +701,13 @@ def test_dataset_structured_property_soft_delete_search_filter_validation( assert entity_urns[0] == dataset_urns[0] # Soft delete the structured property - graph.soft_delete_entity(urn=property_urn) + graph_client.soft_delete_entity(urn=property_urn) wait_for_writes_to_sync() # Perform search, make sure it validates filter and rejects as invalid request try: list( - graph.get_urns_by_filter( + graph_client.get_urns_by_filter( extraFilters=[ { "field": to_es_name(dataset_property_name), @@ -727,7 +727,7 @@ def test_dataset_structured_property_soft_delete_search_filter_validation( raise e -def test_dataset_structured_property_delete(ingest_cleanup_data, graph, caplog): +def test_dataset_structured_property_delete(ingest_cleanup_data, graph_client, caplog): # Create property, assign value to target dataset urn def create_property(target_dataset, prop_value): property_name = f"hardDeleteTest{randint(10, 10000)}Property" @@ -736,12 +736,14 @@ def create_property(target_dataset, prop_value): create_property_definition( property_name=property_name, - graph=graph, + graph=graph_client, value_type=value_type, cardinality="SINGLE", ) - test_property = StructuredProperties.from_datahub(graph=graph, urn=property_urn) + test_property = StructuredProperties.from_datahub( + graph=graph_client, urn=property_urn + ) assert test_property is not None # assign @@ -751,7 +753,7 @@ def create_property(target_dataset, prop_value): prop_value, ) for mcp in dataset_patcher.build(): - graph.emit(mcp) + graph_client.emit(mcp) return test_property @@ -764,17 +766,17 @@ def create_property(target_dataset, prop_value): assert get_property_from_entity( dataset_urns[0], property1.qualified_name, - graph=graph, + graph=graph_client, ) == ["foo"] assert get_property_from_entity( dataset_urns[0], property2.qualified_name, - graph=graph, + graph=graph_client, ) == ["bar"] def validate_search(qualified_name, expected): entity_urns = list( - graph.get_urns_by_filter( + graph_client.get_urns_by_filter( extraFilters=[ { "field": to_es_name(qualified_name=qualified_name), @@ -791,7 +793,7 @@ def validate_search(qualified_name, expected): validate_search(property2.qualified_name, expected=[dataset_urns[0]]) # delete the structured property #1 - graph.hard_delete_entity(urn=property1.urn) + graph_client.hard_delete_entity(urn=property1.urn) wait_for_writes_to_sync() # validate property #1 deleted and property #2 remains @@ -799,18 +801,18 @@ def validate_search(qualified_name, expected): get_property_from_entity( dataset_urns[0], property1.qualified_name, - graph=graph, + graph=graph_client, ) is None ) assert get_property_from_entity( dataset_urns[0], property2.qualified_name, - graph=graph, + graph=graph_client, ) == ["bar"] # assert property 1 definition was removed - property1_definition = graph.get_aspect( + property1_definition = graph_client.get_aspect( property1.urn, StructuredPropertyDefinitionClass ) assert property1_definition is None diff --git a/smoke-test/tests/tags_and_terms/tags_and_terms_test.py b/smoke-test/tests/tags_and_terms/tags_and_terms_test.py index 34404a1ddff59..49bfdbc4f939c 100644 --- a/smoke-test/tests/tags_and_terms/tags_and_terms_test.py +++ b/smoke-test/tests/tags_and_terms/tags_and_terms_test.py @@ -1,36 +1,18 @@ import pytest -from tests.utils import ( - delete_urns_from_file, - get_frontend_url, - ingest_file_via_rest, - wait_for_healthcheck_util, -) +from tests.utils import delete_urns_from_file, ingest_file_via_rest @pytest.fixture(scope="module", autouse=True) -def ingest_cleanup_data(request): +def ingest_cleanup_data(auth_session, graph_client, request): print("ingesting test data") - ingest_file_via_rest("tests/tags_and_terms/data.json") + ingest_file_via_rest(auth_session, "tests/tags_and_terms/data.json") yield print("removing test data") - delete_urns_from_file("tests/tags_and_terms/data.json") + delete_urns_from_file(graph_client, "tests/tags_and_terms/data.json") -@pytest.fixture(scope="session") -def wait_for_healthchecks(): - wait_for_healthcheck_util() - yield - - -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_add_tag(frontend_session): +def test_add_tag(auth_session): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-tags-terms-sample-kafka" env = "PROD" @@ -54,8 +36,8 @@ def test_add_tag(frontend_session): } # Fetch tags - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json ) response.raise_for_status() res_data = response.json() @@ -77,8 +59,8 @@ def test_add_tag(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=add_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=add_json ) response.raise_for_status() res_data = response.json() @@ -88,8 +70,8 @@ def test_add_tag(frontend_session): assert res_data["data"]["addTag"] is True # Refetch the dataset - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json ) response.raise_for_status() res_data = response.json() @@ -121,8 +103,8 @@ def test_add_tag(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=remove_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=remove_json ) response.raise_for_status() res_data = response.json() @@ -134,8 +116,8 @@ def test_add_tag(frontend_session): assert res_data["data"]["removeTag"] is True # Refetch the dataset - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json ) response.raise_for_status() res_data = response.json() @@ -146,8 +128,7 @@ def test_add_tag(frontend_session): assert res_data["data"]["dataset"]["globalTags"] == {"tags": []} -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_add_tag_to_chart(frontend_session): +def test_add_tag_to_chart(auth_session): chart_urn = "urn:li:chart:(looker,test-tags-terms-sample-chart)" chart_json = { @@ -168,8 +149,8 @@ def test_add_tag_to_chart(frontend_session): } # Fetch tags - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=chart_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=chart_json ) response.raise_for_status() res_data = response.json() @@ -191,8 +172,8 @@ def test_add_tag_to_chart(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=add_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=add_json ) response.raise_for_status() res_data = response.json() @@ -202,8 +183,8 @@ def test_add_tag_to_chart(frontend_session): assert res_data["data"]["addTag"] is True # Refetch the dataset - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=chart_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=chart_json ) response.raise_for_status() res_data = response.json() @@ -235,8 +216,8 @@ def test_add_tag_to_chart(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=remove_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=remove_json ) response.raise_for_status() res_data = response.json() @@ -246,8 +227,8 @@ def test_add_tag_to_chart(frontend_session): assert res_data["data"]["removeTag"] is True # Refetch the dataset - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=chart_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=chart_json ) response.raise_for_status() res_data = response.json() @@ -258,8 +239,7 @@ def test_add_tag_to_chart(frontend_session): assert res_data["data"]["chart"]["globalTags"] == {"tags": []} -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_add_term(frontend_session): +def test_add_term(auth_session): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-tags-terms-sample-kafka" env = "PROD" @@ -282,8 +262,8 @@ def test_add_term(frontend_session): } # Fetch the terms - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json ) response.raise_for_status() res_data = response.json() @@ -305,8 +285,8 @@ def test_add_term(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=add_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=add_json ) response.raise_for_status() res_data = response.json() @@ -318,8 +298,8 @@ def test_add_term(frontend_session): assert res_data["data"]["addTerm"] is True # Refetch the dataset - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json ) response.raise_for_status() res_data = response.json() @@ -350,8 +330,8 @@ def test_add_term(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=remove_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=remove_json ) response.raise_for_status() res_data = response.json() @@ -362,8 +342,8 @@ def test_add_term(frontend_session): assert res_data["data"] assert res_data["data"]["removeTerm"] is True # Refetch the dataset - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_json ) response.raise_for_status() res_data = response.json() @@ -374,8 +354,7 @@ def test_add_term(frontend_session): assert res_data["data"]["dataset"]["glossaryTerms"] == {"terms": []} -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_update_schemafield(frontend_session): +def test_update_schemafield(auth_session): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-tags-terms-sample-kafka" env = "PROD" @@ -454,8 +433,8 @@ def test_update_schemafield(frontend_session): } # dataset schema tags - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_schema_json_tags + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_schema_json_tags ) response.raise_for_status() res_data = response.json() @@ -479,8 +458,8 @@ def test_update_schemafield(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=add_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=add_json ) response.raise_for_status() res_data = response.json() @@ -490,8 +469,8 @@ def test_update_schemafield(frontend_session): assert res_data["data"]["addTag"] is True # Refetch the dataset schema - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_schema_json_tags + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_schema_json_tags ) response.raise_for_status() res_data = response.json() @@ -531,8 +510,8 @@ def test_update_schemafield(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=remove_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=remove_json ) response.raise_for_status() res_data = response.json() @@ -544,8 +523,8 @@ def test_update_schemafield(frontend_session): assert res_data["data"]["removeTag"] is True # Refetch the dataset - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_schema_json_tags + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_schema_json_tags ) response.raise_for_status() res_data = response.json() @@ -571,8 +550,8 @@ def test_update_schemafield(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=add_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=add_json ) response.raise_for_status() res_data = response.json() @@ -582,8 +561,8 @@ def test_update_schemafield(frontend_session): assert res_data["data"]["addTerm"] is True # Refetch the dataset schema - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_schema_json_terms + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_schema_json_terms ) response.raise_for_status() res_data = response.json() @@ -622,8 +601,8 @@ def test_update_schemafield(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=remove_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=remove_json ) response.raise_for_status() res_data = response.json() @@ -633,8 +612,8 @@ def test_update_schemafield(frontend_session): assert res_data["data"]["removeTerm"] is True # Refetch the dataset - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_schema_json_terms + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_schema_json_terms ) response.raise_for_status() res_data = response.json() @@ -647,8 +626,8 @@ def test_update_schemafield(frontend_session): } # dataset schema tags - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_schema_json_tags + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=dataset_schema_json_tags ) response.raise_for_status() res_data = response.json() @@ -668,8 +647,9 @@ def test_update_schemafield(frontend_session): } # fetch no description - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_schema_json_description + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", + json=dataset_schema_json_description, ) response.raise_for_status() res_data = response.json() @@ -681,8 +661,8 @@ def test_update_schemafield(frontend_session): "editableSchemaFieldInfo": [{"description": None}] } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=update_description_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=update_description_json ) response.raise_for_status() res_data = response.json() @@ -692,8 +672,9 @@ def test_update_schemafield(frontend_session): assert res_data["data"]["updateDescription"] is True # Refetch the dataset - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_schema_json_description + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", + json=dataset_schema_json_description, ) response.raise_for_status() res_data = response.json() diff --git a/smoke-test/tests/telemetry/telemetry_test.py b/smoke-test/tests/telemetry/telemetry_test.py index 96f2fa69014cf..9fb1fcbb7fbf1 100644 --- a/smoke-test/tests/telemetry/telemetry_test.py +++ b/smoke-test/tests/telemetry/telemetry_test.py @@ -1,21 +1,18 @@ import json from datahub.cli.cli_utils import get_aspects_for_entity -from datahub.ingestion.graph.client import get_default_graph -def test_no_client_id(): +def test_no_client_id(graph_client): client_id_urn = "urn:li:telemetry:clientId" aspect = [ "clientId" ] # this is checking for the removal of the invalid aspect RemoveClientIdAspectStep.java - client = get_default_graph() - res_data = json.dumps( get_aspects_for_entity( - session=client._session, - gms_host=client.config.server, + session=graph_client._session, + gms_host=graph_client.config.server, entity_urn=client_id_urn, aspects=aspect, typed=False, @@ -24,16 +21,14 @@ def test_no_client_id(): assert res_data == "{}" -def test_no_telemetry_client_id(): +def test_no_telemetry_client_id(graph_client): client_id_urn = "urn:li:telemetry:clientId" aspect = ["telemetryClientId"] # telemetry expected to be disabled for tests - client = get_default_graph() - res_data = json.dumps( get_aspects_for_entity( - session=client._session, - gms_host=client.config.server, + session=graph_client._session, + gms_host=graph_client.config.server, entity_urn=client_id_urn, aspects=aspect, typed=False, diff --git a/smoke-test/tests/test_stateful_ingestion.py b/smoke-test/tests/test_stateful_ingestion.py index 5eac25059ec62..c0df51dd9d98e 100644 --- a/smoke-test/tests/test_stateful_ingestion.py +++ b/smoke-test/tests/test_stateful_ingestion.py @@ -11,15 +11,10 @@ from sqlalchemy import create_engine from sqlalchemy.sql import text -from tests.utils import ( - get_gms_url, - get_mysql_password, - get_mysql_url, - get_mysql_username, -) +from tests.utils import get_mysql_password, get_mysql_url, get_mysql_username -def test_stateful_ingestion(wait_for_healthchecks): +def test_stateful_ingestion(auth_session): def create_mysql_engine(mysql_source_config_dict: Dict[str, Any]) -> Any: mysql_config = MySQLConfig.parse_obj(mysql_source_config_dict) url = mysql_config.get_sql_alchemy_url() @@ -50,6 +45,7 @@ def validate_all_providers_have_committed_successfully(pipeline: Pipeline) -> No assert provider_count == 1 def get_current_checkpoint_from_pipeline( + auth_session, pipeline: Pipeline, ) -> Optional[Checkpoint[GenericCheckpointState]]: # TODO: Refactor to use the helper method in the metadata-ingestion tests, instead of copying it here. @@ -71,7 +67,7 @@ def get_current_checkpoint_from_pipeline( "fail_safe_threshold": 100.0, "state_provider": { "type": "datahub", - "config": {"datahub_api": {"server": get_gms_url()}}, + "config": {"datahub_api": {"server": auth_session.gms_url()}}, }, }, } @@ -83,7 +79,10 @@ def get_current_checkpoint_from_pipeline( }, "sink": { "type": "datahub-rest", - "config": {"server": get_gms_url()}, + "config": { + "server": auth_session.gms_url(), + "token": auth_session.gms_token(), + }, }, "pipeline_name": "mysql_stateful_ingestion_smoke_test_pipeline", "reporting": [ @@ -108,14 +107,14 @@ def get_current_checkpoint_from_pipeline( # 3. Do the first run of the pipeline and get the default job's checkpoint. pipeline_run1 = run_and_get_pipeline(pipeline_config_dict) - checkpoint1 = get_current_checkpoint_from_pipeline(pipeline_run1) + checkpoint1 = get_current_checkpoint_from_pipeline(auth_session, pipeline_run1) assert checkpoint1 assert checkpoint1.state # 4. Drop table t1 created during step 2 + rerun the pipeline and get the checkpoint state. drop_table(mysql_engine, table_names[0]) pipeline_run2 = run_and_get_pipeline(pipeline_config_dict) - checkpoint2 = get_current_checkpoint_from_pipeline(pipeline_run2) + checkpoint2 = get_current_checkpoint_from_pipeline(auth_session, pipeline_run2) assert checkpoint2 assert checkpoint2.state diff --git a/smoke-test/tests/tests/tests_test.py b/smoke-test/tests/tests/tests_test.py index 28005c8397d0d..bc9ebe46c5279 100644 --- a/smoke-test/tests/tests/tests_test.py +++ b/smoke-test/tests/tests/tests_test.py @@ -1,36 +1,18 @@ import pytest import tenacity -from tests.utils import ( - delete_urns_from_file, - get_frontend_url, - get_sleep_info, - ingest_file_via_rest, - wait_for_healthcheck_util, -) +from tests.utils import delete_urns_from_file, get_sleep_info, ingest_file_via_rest sleep_sec, sleep_times = get_sleep_info() @pytest.fixture(scope="module", autouse=True) -def ingest_cleanup_data(request): +def ingest_cleanup_data(auth_session, graph_client, request): print("ingesting test data") - ingest_file_via_rest("tests/tests/data.json") + ingest_file_via_rest(auth_session, "tests/tests/data.json") yield print("removing test data") - delete_urns_from_file("tests/tests/data.json") - - -@pytest.fixture(scope="session") -def wait_for_healthchecks(): - wait_for_healthcheck_util() - yield - - -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass + delete_urns_from_file(graph_client, "tests/tests/data.json") test_id = "test id" @@ -40,7 +22,7 @@ def test_healthchecks(wait_for_healthchecks): test_description = "test description" -def create_test(frontend_session): +def create_test(auth_session): # Create new Test create_test_json = { "query": """mutation createTest($input: CreateTestInput!) {\n @@ -57,8 +39,8 @@ def create_test(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=create_test_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=create_test_json ) response.raise_for_status() res_data = response.json() @@ -71,7 +53,7 @@ def create_test(frontend_session): return res_data["data"]["createTest"] -def delete_test(frontend_session, test_urn): +def delete_test(auth_session, test_urn): delete_test_json = { "query": """mutation deleteTest($urn: String!) {\n deleteTest(urn: $urn) @@ -79,15 +61,15 @@ def delete_test(frontend_session, test_urn): "variables": {"urn": test_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=delete_test_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=delete_test_json ) response.raise_for_status() -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_create_test(frontend_session, wait_for_healthchecks): - test_urn = create_test(frontend_session) +@pytest.mark.dependency() +def test_create_test(auth_session): + test_urn = create_test(auth_session) # Get the test get_test_json = { @@ -104,8 +86,8 @@ def test_create_test(frontend_session, wait_for_healthchecks): }""", "variables": {"urn": test_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=get_test_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=get_test_json ) response.raise_for_status() res_data = response.json() @@ -124,11 +106,11 @@ def test_create_test(frontend_session, wait_for_healthchecks): assert "errors" not in res_data # Delete test - delete_test(frontend_session, test_urn) + delete_test(auth_session, test_urn) # Ensure the test no longer exists - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=get_test_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=get_test_json ) response.raise_for_status() res_data = response.json() @@ -137,9 +119,9 @@ def test_create_test(frontend_session, wait_for_healthchecks): assert "errors" not in res_data -@pytest.mark.dependency(depends=["test_healthchecks", "test_create_test"]) -def test_update_test(frontend_session, wait_for_healthchecks): - test_urn = create_test(frontend_session) +@pytest.mark.dependency(depends=["test_create_test"]) +def test_update_test(auth_session): + test_urn = create_test(auth_session) test_name = "new name" test_category = "new category" test_description = "new description" @@ -161,8 +143,8 @@ def test_update_test(frontend_session, wait_for_healthchecks): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=update_test_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=update_test_json ) response.raise_for_status() res_data = response.json() @@ -187,8 +169,8 @@ def test_update_test(frontend_session, wait_for_healthchecks): }""", "variables": {"urn": test_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=get_test_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=get_test_json ) response.raise_for_status() res_data = response.json() @@ -206,13 +188,13 @@ def test_update_test(frontend_session, wait_for_healthchecks): } assert "errors" not in res_data - delete_test(frontend_session, test_urn) + delete_test(auth_session, test_urn) @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def test_list_tests_retries(frontend_session): +def test_list_tests_retries(auth_session): list_tests_json = { "query": """query listTests($input: ListTestsInput!) {\n listTests(input: $input) {\n @@ -227,8 +209,8 @@ def test_list_tests_retries(frontend_session): "variables": {"input": {"start": "0", "count": "20"}}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=list_tests_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=list_tests_json ) response.raise_for_status() res_data = response.json() @@ -240,13 +222,12 @@ def test_list_tests_retries(frontend_session): assert "errors" not in res_data -@pytest.mark.dependency(depends=["test_healthchecks", "test_update_test"]) -def test_list_tests(frontend_session, wait_for_healthchecks): - test_list_tests_retries(frontend_session) +@pytest.mark.dependency(depends=["test_update_test"]) +def test_list_tests(auth_session): + test_list_tests_retries(auth_session) -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_get_test_results(frontend_session, wait_for_healthchecks): +def test_get_test_results(auth_session): urn = ( "urn:li:dataset:(urn:li:dataPlatform:kafka,test-tests-sample,PROD)" # Test urn ) @@ -272,7 +253,9 @@ def test_get_test_results(frontend_session, wait_for_healthchecks): }""", "variables": {"urn": urn}, } - response = frontend_session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) response.raise_for_status() res_data = response.json() diff --git a/smoke-test/tests/timeline/timeline_test.py b/smoke-test/tests/timeline/timeline_test.py index 4573514f7806c..d33ad3b24ceb5 100644 --- a/smoke-test/tests/timeline/timeline_test.py +++ b/smoke-test/tests/timeline/timeline_test.py @@ -4,20 +4,20 @@ from datahub.cli import timeline_cli from datahub.cli.cli_utils import guess_entity_type, post_entity -from tests.utils import get_datahub_graph, ingest_file_via_rest, wait_for_writes_to_sync +from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync pytestmark = pytest.mark.no_cypress_suite1 -def test_all(): +def test_all(auth_session, graph_client): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-timeline-sample-kafka" env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" - ingest_file_via_rest("tests/timeline/timeline_test_data.json") - ingest_file_via_rest("tests/timeline/timeline_test_datav2.json") - ingest_file_via_rest("tests/timeline/timeline_test_datav3.json") + ingest_file_via_rest(auth_session, "tests/timeline/timeline_test_data.json") + ingest_file_via_rest(auth_session, "tests/timeline/timeline_test_datav2.json") + ingest_file_via_rest(auth_session, "tests/timeline/timeline_test_datav3.json") res_data = timeline_cli.get_timeline( dataset_urn, @@ -25,8 +25,9 @@ def test_all(): None, None, False, + graph=graph_client, ) - get_datahub_graph().hard_delete_entity(urn=dataset_urn) + graph_client.hard_delete_entity(urn=dataset_urn) assert res_data assert len(res_data) == 3 @@ -39,21 +40,36 @@ def test_all(): assert res_data[2]["semVer"] == "2.0.0-computed" -def test_schema(): +def test_schema(graph_client): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-timeline-sample-kafka" env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" - put(dataset_urn, "schemaMetadata", "test_resources/timeline/newschema.json") - put(dataset_urn, "schemaMetadata", "test_resources/timeline/newschemav2.json") - put(dataset_urn, "schemaMetadata", "test_resources/timeline/newschemav3.json") + put( + graph_client, + dataset_urn, + "schemaMetadata", + "test_resources/timeline/newschema.json", + ) + put( + graph_client, + dataset_urn, + "schemaMetadata", + "test_resources/timeline/newschemav2.json", + ) + put( + graph_client, + dataset_urn, + "schemaMetadata", + "test_resources/timeline/newschemav3.json", + ) res_data = timeline_cli.get_timeline( - dataset_urn, ["TECHNICAL_SCHEMA"], None, None, False + dataset_urn, ["TECHNICAL_SCHEMA"], None, None, False, graph=graph_client ) - get_datahub_graph().hard_delete_entity(urn=dataset_urn) + graph_client.hard_delete_entity(urn=dataset_urn) assert res_data assert len(res_data) == 3 assert res_data[0]["semVerChange"] == "MINOR" @@ -65,21 +81,36 @@ def test_schema(): assert res_data[2]["semVer"] == "2.0.0-computed" -def test_glossary(): +def test_glossary(graph_client): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-timeline-sample-kafka" env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" - put(dataset_urn, "glossaryTerms", "test_resources/timeline/newglossary.json") - put(dataset_urn, "glossaryTerms", "test_resources/timeline/newglossaryv2.json") - put(dataset_urn, "glossaryTerms", "test_resources/timeline/newglossaryv3.json") + put( + graph_client, + dataset_urn, + "glossaryTerms", + "test_resources/timeline/newglossary.json", + ) + put( + graph_client, + dataset_urn, + "glossaryTerms", + "test_resources/timeline/newglossaryv2.json", + ) + put( + graph_client, + dataset_urn, + "glossaryTerms", + "test_resources/timeline/newglossaryv3.json", + ) res_data = timeline_cli.get_timeline( - dataset_urn, ["GLOSSARY_TERM"], None, None, False + dataset_urn, ["GLOSSARY_TERM"], None, None, False, graph=graph_client ) - get_datahub_graph().hard_delete_entity(urn=dataset_urn) + graph_client.hard_delete_entity(urn=dataset_urn) assert res_data assert len(res_data) == 3 assert res_data[0]["semVerChange"] == "MINOR" @@ -91,33 +122,36 @@ def test_glossary(): assert res_data[2]["semVer"] == "0.2.0-computed" -def test_documentation(): +def test_documentation(graph_client): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-timeline-sample-kafka" env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" put( + graph_client, dataset_urn, "institutionalMemory", "test_resources/timeline/newdocumentation.json", ) put( + graph_client, dataset_urn, "institutionalMemory", "test_resources/timeline/newdocumentationv2.json", ) put( + graph_client, dataset_urn, "institutionalMemory", "test_resources/timeline/newdocumentationv3.json", ) res_data = timeline_cli.get_timeline( - dataset_urn, ["DOCUMENTATION"], None, None, False + dataset_urn, ["DOCUMENTATION"], None, None, False, graph=graph_client ) - get_datahub_graph().hard_delete_entity(urn=dataset_urn) + graph_client.hard_delete_entity(urn=dataset_urn) assert res_data assert len(res_data) == 3 assert res_data[0]["semVerChange"] == "MINOR" @@ -129,19 +163,31 @@ def test_documentation(): assert res_data[2]["semVer"] == "0.2.0-computed" -def test_tags(): +def test_tags(graph_client): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-timeline-sample-kafka" env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" - put(dataset_urn, "globalTags", "test_resources/timeline/newtags.json") - put(dataset_urn, "globalTags", "test_resources/timeline/newtagsv2.json") - put(dataset_urn, "globalTags", "test_resources/timeline/newtagsv3.json") + put(graph_client, dataset_urn, "globalTags", "test_resources/timeline/newtags.json") + put( + graph_client, + dataset_urn, + "globalTags", + "test_resources/timeline/newtagsv2.json", + ) + put( + graph_client, + dataset_urn, + "globalTags", + "test_resources/timeline/newtagsv3.json", + ) - res_data = timeline_cli.get_timeline(dataset_urn, ["TAG"], None, None, False) + res_data = timeline_cli.get_timeline( + dataset_urn, ["TAG"], None, None, False, graph=graph_client + ) - get_datahub_graph().hard_delete_entity(urn=dataset_urn) + graph_client.hard_delete_entity(urn=dataset_urn) assert res_data assert len(res_data) == 3 assert res_data[0]["semVerChange"] == "MINOR" @@ -153,19 +199,36 @@ def test_tags(): assert res_data[2]["semVer"] == "0.2.0-computed" -def test_ownership(): +def test_ownership(graph_client): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-timeline-sample-kafka" env = "PROD" dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})" - put(dataset_urn, "ownership", "test_resources/timeline/newownership.json") - put(dataset_urn, "ownership", "test_resources/timeline/newownershipv2.json") - put(dataset_urn, "ownership", "test_resources/timeline/newownershipv3.json") + put( + graph_client, + dataset_urn, + "ownership", + "test_resources/timeline/newownership.json", + ) + put( + graph_client, + dataset_urn, + "ownership", + "test_resources/timeline/newownershipv2.json", + ) + put( + graph_client, + dataset_urn, + "ownership", + "test_resources/timeline/newownershipv3.json", + ) - res_data = timeline_cli.get_timeline(dataset_urn, ["OWNER"], None, None, False) + res_data = timeline_cli.get_timeline( + dataset_urn, ["OWNER"], None, None, False, graph=graph_client + ) - get_datahub_graph().hard_delete_entity(urn=dataset_urn) + graph_client.hard_delete_entity(urn=dataset_urn) assert res_data assert len(res_data) == 3 assert res_data[0]["semVerChange"] == "MINOR" @@ -177,9 +240,9 @@ def test_ownership(): assert res_data[2]["semVer"] == "0.2.0-computed" -def put(urn: str, aspect: str, aspect_data: str) -> None: +def put(graph_client, urn: str, aspect: str, aspect_data: str) -> None: """Update a single aspect of an entity""" - client = get_datahub_graph() + client = graph_client entity_type = guess_entity_type(urn) with open(aspect_data) as fp: aspect_obj = json.load(fp) diff --git a/smoke-test/tests/tokens/revokable_access_token_test.py b/smoke-test/tests/tokens/revokable_access_token_test.py index 7447cba60b9bb..0ae18c32662c5 100644 --- a/smoke-test/tests/tokens/revokable_access_token_test.py +++ b/smoke-test/tests/tokens/revokable_access_token_test.py @@ -6,7 +6,6 @@ get_admin_credentials, get_frontend_url, login_as, - wait_for_healthcheck_util, wait_for_writes_to_sync, ) @@ -18,20 +17,21 @@ (admin_user, admin_pass) = get_admin_credentials() -@pytest.fixture(scope="session") -def wait_for_healthchecks(): - wait_for_healthcheck_util() - yield - - @pytest.fixture(autouse=True) -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. +def setup(auth_session): wait_for_writes_to_sync() -@pytest.mark.dependency(depends=["test_healthchecks"]) +@pytest.fixture() +def auth_exclude_filter(): + return { + "field": "name", + "condition": "EQUAL", + "negated": True, + "values": ["Test Session Token"], + } + + @pytest.fixture(scope="class", autouse=True) def custom_user_setup(): """Fixture to execute setup before and tear down after all tests are run""" @@ -108,13 +108,12 @@ def custom_user_setup(): assert {"username": "user"} not in res_data["data"]["listUsers"]["users"] -@pytest.mark.dependency(depends=["test_healthchecks"]) @pytest.fixture(autouse=True) -def access_token_setup(): +def access_token_setup(auth_session, auth_exclude_filter): """Fixture to execute asserts before and after a test is run""" admin_session = login_as(admin_user, admin_pass) - res_data = listAccessTokens(admin_session) + res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) assert res_data assert res_data["data"] assert res_data["data"]["listAccessTokens"]["total"] == 0 @@ -123,21 +122,17 @@ def access_token_setup(): yield # Clean up - res_data = listAccessTokens(admin_session) + res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) for metadata in res_data["data"]["listAccessTokens"]["tokens"]: revokeAccessToken(admin_session, metadata["id"]) - # Sleep for eventual consistency - wait_for_writes_to_sync() - -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_admin_can_create_list_and_revoke_tokens(wait_for_healthchecks): +def test_admin_can_create_list_and_revoke_tokens(auth_exclude_filter): admin_session = login_as(admin_user, admin_pass) admin_user_urn = f"urn:li:corpuser:{admin_user}" # Using a super account, there should be no tokens - res_data = listAccessTokens(admin_session) + res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) assert res_data assert res_data["data"] assert res_data["data"]["listAccessTokens"]["total"] is not None @@ -165,7 +160,7 @@ def test_admin_can_create_list_and_revoke_tokens(wait_for_healthchecks): assert res_data["data"]["getAccessTokenMetadata"]["actorUrn"] == admin_user_urn # Using a super account, list the previously created token. - res_data = listAccessTokens(admin_session) + res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) assert res_data assert res_data["data"] assert res_data["data"]["listAccessTokens"]["total"] is not None @@ -183,23 +178,20 @@ def test_admin_can_create_list_and_revoke_tokens(wait_for_healthchecks): assert res_data["data"] assert res_data["data"]["revokeAccessToken"] assert res_data["data"]["revokeAccessToken"] is True - # Sleep for eventual consistency - wait_for_writes_to_sync() # Using a super account, there should be no tokens - res_data = listAccessTokens(admin_session) + res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) assert res_data assert res_data["data"] assert res_data["data"]["listAccessTokens"]["total"] is not None assert len(res_data["data"]["listAccessTokens"]["tokens"]) == 0 -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_admin_can_create_and_revoke_tokens_for_other_user(wait_for_healthchecks): +def test_admin_can_create_and_revoke_tokens_for_other_user(auth_exclude_filter): admin_session = login_as(admin_user, admin_pass) # Using a super account, there should be no tokens - res_data = listAccessTokens(admin_session) + res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) assert res_data assert res_data["data"] assert res_data["data"]["listAccessTokens"]["total"] is not None @@ -220,7 +212,7 @@ def test_admin_can_create_and_revoke_tokens_for_other_user(wait_for_healthchecks wait_for_writes_to_sync() # Using a super account, list the previously created tokens. - res_data = listAccessTokens(admin_session) + res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) assert res_data assert res_data["data"] assert res_data["data"]["listAccessTokens"]["total"] is not None @@ -240,19 +232,16 @@ def test_admin_can_create_and_revoke_tokens_for_other_user(wait_for_healthchecks assert res_data["data"] assert res_data["data"]["revokeAccessToken"] assert res_data["data"]["revokeAccessToken"] is True - # Sleep for eventual consistency - wait_for_writes_to_sync() # Using a super account, there should be no tokens - res_data = listAccessTokens(admin_session) + res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) assert res_data assert res_data["data"] assert res_data["data"]["listAccessTokens"]["total"] is not None assert len(res_data["data"]["listAccessTokens"]["tokens"]) == 0 -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_non_admin_can_create_list_revoke_tokens(wait_for_healthchecks): +def test_non_admin_can_create_list_revoke_tokens(auth_exclude_filter): user_session = login_as("user", "user") # Normal user should be able to generate token for himself. @@ -271,7 +260,11 @@ def test_non_admin_can_create_list_revoke_tokens(wait_for_healthchecks): # User should be able to list his own token res_data = listAccessTokens( - user_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] + user_session, + [ + {"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}, + auth_exclude_filter, + ], ) assert res_data assert res_data["data"] @@ -293,12 +286,14 @@ def test_non_admin_can_create_list_revoke_tokens(wait_for_healthchecks): assert res_data["data"] assert res_data["data"]["revokeAccessToken"] assert res_data["data"]["revokeAccessToken"] is True - # Sleep for eventual consistency - wait_for_writes_to_sync() # Using a normal account, check that all its tokens where removed. res_data = listAccessTokens( - user_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] + user_session, + [ + {"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}, + auth_exclude_filter, + ], ) assert res_data assert res_data["data"] @@ -306,12 +301,11 @@ def test_non_admin_can_create_list_revoke_tokens(wait_for_healthchecks): assert len(res_data["data"]["listAccessTokens"]["tokens"]) == 0 -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): +def test_admin_can_manage_tokens_generated_by_other_user(auth_exclude_filter): admin_session = login_as(admin_user, admin_pass) # Using a super account, there should be no tokens - res_data = listAccessTokens(admin_session) + res_data = listAccessTokens(admin_session, filters=[auth_exclude_filter]) assert res_data assert res_data["data"] assert res_data["data"]["listAccessTokens"]["total"] is not None @@ -340,7 +334,11 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): user_session.cookies.clear() admin_session = login_as(admin_user, admin_pass) res_data = listAccessTokens( - admin_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] + admin_session, + [ + {"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}, + auth_exclude_filter, + ], ) assert res_data assert res_data["data"] @@ -364,14 +362,16 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): assert res_data["data"] assert res_data["data"]["revokeAccessToken"] assert res_data["data"]["revokeAccessToken"] is True - # Sleep for eventual consistency - wait_for_writes_to_sync() # Using a normal account, check that all its tokens where removed. user_session.cookies.clear() user_session = login_as("user", "user") res_data = listAccessTokens( - user_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] + user_session, + [ + {"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}, + auth_exclude_filter, + ], ) assert res_data assert res_data["data"] @@ -381,7 +381,11 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): # Using the super account, check that all tokens where removed. admin_session = login_as(admin_user, admin_pass) res_data = listAccessTokens( - admin_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] + admin_session, + [ + {"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}, + auth_exclude_filter, + ], ) assert res_data assert res_data["data"] @@ -389,8 +393,7 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): assert len(res_data["data"]["listAccessTokens"]["tokens"]) == 0 -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_non_admin_can_not_generate_tokens_for_others(wait_for_healthchecks): +def test_non_admin_can_not_generate_tokens_for_others(): user_session = login_as("user", "user") # Normal user should not be able to generate token for another user res_data = generateAccessToken_v2(user_session, f"urn:li:corpuser:{admin_user}") @@ -434,10 +437,7 @@ def generateAccessToken_v2(session, actorUrn): def listAccessTokens(session, filters=[]): # Get count of existing tokens - input = { - "start": "0", - "count": "20", - } + input = {"start": 0, "count": 20} if filters: input["filters"] = filters diff --git a/smoke-test/tests/utils.py b/smoke-test/tests/utils.py index 7564f1a05e79e..2ddf233f5029a 100644 --- a/smoke-test/tests/utils.py +++ b/smoke-test/tests/utils.py @@ -1,16 +1,15 @@ -import functools import json import logging import os from datetime import datetime, timedelta, timezone from typing import Any, Dict, List, Tuple +import requests from datahub.cli import cli_utils, env_utils -from datahub.ingestion.graph.client import DataHubGraph, get_default_graph from datahub.ingestion.run.pipeline import Pipeline from joblib import Parallel, delayed +from requests.structures import CaseInsensitiveDict -import requests_wrapper as requests from tests.consistency_utils import wait_for_writes_to_sync TIME: int = 1581407189000 @@ -83,14 +82,14 @@ def is_k8s_enabled(): return os.getenv("K8S_CLUSTER_ENABLED", "false").lower() in ["true", "yes"] -def wait_for_healthcheck_util(): - assert not check_endpoint(f"{get_frontend_url()}/admin") - assert not check_endpoint(f"{get_gms_url()}/health") +def wait_for_healthcheck_util(auth_session): + assert not check_endpoint(auth_session, f"{get_frontend_url()}/admin") + assert not check_endpoint(auth_session, f"{get_gms_url()}/health") -def check_endpoint(url): +def check_endpoint(auth_session, url): try: - get = requests.get(url) + get = auth_session.get(url) if get.status_code == 200: return else: @@ -99,7 +98,7 @@ def check_endpoint(url): raise SystemExit(f"{url}: is Not reachable \nErr: {e}") -def ingest_file_via_rest(filename: str) -> Pipeline: +def ingest_file_via_rest(auth_session, filename: str) -> Pipeline: pipeline = Pipeline.create( { "source": { @@ -108,7 +107,10 @@ def ingest_file_via_rest(filename: str) -> Pipeline: }, "sink": { "type": "datahub-rest", - "config": {"server": get_gms_url()}, + "config": { + "server": auth_session.gms_url(), + "token": auth_session.gms_token(), + }, }, } ) @@ -118,31 +120,21 @@ def ingest_file_via_rest(filename: str) -> Pipeline: return pipeline -@functools.lru_cache(maxsize=1) -def get_datahub_graph() -> DataHubGraph: - return get_default_graph() +def delete_urn(graph_client, urn: str) -> None: + graph_client.hard_delete_entity(urn) -def delete_urn(urn: str) -> None: - get_datahub_graph().hard_delete_entity(urn) - - -def delete_urns(urns: List[str]) -> None: +def delete_urns(graph_client, urns: List[str]) -> None: for urn in urns: - delete_urn(urn) + delete_urn(graph_client, urn) -def delete_urns_from_file(filename: str, shared_data: bool = False) -> None: +def delete_urns_from_file( + graph_client, filename: str, shared_data: bool = False +) -> None: if not env_utils.get_boolean_env_variable("CLEANUP_DATA", True): print("Not cleaning data to save time") return - session = requests.Session() - session.headers.update( - { - "X-RestLi-Protocol-Version": "2.0.0", - "Content-Type": "application/json", - } - ) def delete(entry): is_mcp = "entityUrn" in entry @@ -154,7 +146,7 @@ def delete(entry): snapshot_union = entry["proposedSnapshot"] snapshot = list(snapshot_union.values())[0] urn = snapshot["urn"] - delete_urn(urn) + delete_urn(graph_client, urn) with open(filename) as f: d = json.load(f) @@ -221,3 +213,108 @@ def create_datahub_step_state_aspects( ] with open(onboarding_filename, "w") as f: json.dump(aspects_dict, f, indent=2) + + +class TestSessionWrapper: + """ + Many of the tests do not consider async writes. This + class intercepts mutations using the requests library + to simulate sync requests. + """ + + def __init__(self, requests_session): + self._upstream = requests_session + self._frontend_url = get_frontend_url() + self._gms_url = get_gms_url() + self._gms_token_id, self._gms_token = self._generate_gms_token() + + def __getattr__(self, name): + # Intercept method calls + attr = getattr(self._upstream, name) + + if callable(attr): + + def wrapper(*args, **kwargs): + # Pre-processing can be done here + if name in ("get", "head", "post", "put", "delete", "option", "patch"): + if "headers" not in kwargs: + kwargs["headers"] = CaseInsensitiveDict() + kwargs["headers"].update( + {"Authorization": f"Bearer {self._gms_token}"} + ) + + result = attr(*args, **kwargs) + + # Post-processing can be done here + if name in ("post", "put"): + # Wait for sync if writing + # delete is excluded for efficient test clean-up + self._wait(*args, **kwargs) + + return result + + return wrapper + + return attr + + def gms_token(self): + return self._gms_token + + def gms_token_id(self): + return self._gms_token_id + + def frontend_url(self): + return self._frontend_url + + def gms_url(self): + return self._gms_url + + def _wait(self, *args, **kwargs): + if "/logIn" not in args[0]: + print("TestSessionWrapper sync wait.") + wait_for_writes_to_sync() + + def _generate_gms_token(self): + actor_urn = self._upstream.cookies["actor"] + json = { + "query": """mutation createAccessToken($input: CreateAccessTokenInput!) { + createAccessToken(input: $input) { + accessToken + metadata { + id + } + } + }""", + "variables": { + "input": { + "type": "PERSONAL", + "actorUrn": actor_urn, + "duration": "ONE_DAY", + "name": "Test Session Token", + "description": "Token generated for smoke-tests", + } + }, + } + + response = self._upstream.post( + f"{self._frontend_url}/api/v2/graphql", json=json + ) + response.raise_for_status() + return ( + response.json()["data"]["createAccessToken"]["metadata"]["id"], + response.json()["data"]["createAccessToken"]["accessToken"], + ) + + def destroy(self): + if self._gms_token_id: + json = { + "query": """mutation revokeAccessToken($tokenId: String!) { + revokeAccessToken(tokenId: $tokenId) + }""", + "variables": {"tokenId": self._gms_token_id}, + } + + response = self._upstream.post( + f"{self._frontend_url}/api/v2/graphql", json=json + ) + response.raise_for_status() diff --git a/smoke-test/tests/views/views_test.py b/smoke-test/tests/views/views_test.py index a99f1f0dbb245..cd961b6125057 100644 --- a/smoke-test/tests/views/views_test.py +++ b/smoke-test/tests/views/views_test.py @@ -1,24 +1,18 @@ import pytest import tenacity -from tests.utils import get_frontend_url, get_sleep_info +from tests.utils import get_sleep_info sleep_sec, sleep_times = get_sleep_info() -@pytest.mark.dependency() -def test_healthchecks(wait_for_healthchecks): - # Call to wait_for_healthchecks fixture will do the actual functionality. - pass - - @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_more_views(frontend_session, list_views_json, query_name, before_count): +def _ensure_more_views(auth_session, list_views_json, query_name, before_count): # Get new count of Views - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=list_views_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=list_views_json ) response.raise_for_status() res_data = response.json() @@ -38,10 +32,10 @@ def _ensure_more_views(frontend_session, list_views_json, query_name, before_cou @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_less_views(frontend_session, list_views_json, query_name, before_count): +def _ensure_less_views(auth_session, list_views_json, query_name, before_count): # Get new count of Views - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=list_views_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=list_views_json ) response.raise_for_status() res_data = response.json() @@ -57,8 +51,8 @@ def _ensure_less_views(frontend_session, list_views_json, query_name, before_cou assert after_count == before_count - 1 -@pytest.mark.dependency(depends=["test_healthchecks"]) -def test_create_list_delete_global_view(frontend_session): +@pytest.mark.dependency() +def test_create_list_delete_global_view(auth_session): # Get count of existing views list_global_views_json = { "query": """query listGlobalViews($input: ListGlobalViewsInput!) {\n @@ -88,8 +82,8 @@ def test_create_list_delete_global_view(frontend_session): "variables": {"input": {"start": "0", "count": "20"}}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=list_global_views_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=list_global_views_json ) response.raise_for_status() res_data = response.json() @@ -135,8 +129,8 @@ def test_create_list_delete_global_view(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=create_view_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=create_view_json ) response.raise_for_status() res_data = response.json() @@ -149,7 +143,7 @@ def test_create_list_delete_global_view(frontend_session): view_urn = res_data["data"]["createView"]["urn"] new_count = _ensure_more_views( - frontend_session=frontend_session, + auth_session=auth_session, list_views_json=list_global_views_json, query_name="listGlobalViews", before_count=before_count, @@ -163,25 +157,23 @@ def test_create_list_delete_global_view(frontend_session): "variables": {"urn": view_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=delete_view_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=delete_view_json ) response.raise_for_status() res_data = response.json() assert "errors" not in res_data _ensure_less_views( - frontend_session=frontend_session, + auth_session=auth_session, list_views_json=list_global_views_json, query_name="listGlobalViews", before_count=new_count, ) -@pytest.mark.dependency( - depends=["test_healthchecks", "test_create_list_delete_global_view"] -) -def test_create_list_delete_personal_view(frontend_session): +@pytest.mark.dependency(depends=["test_create_list_delete_global_view"]) +def test_create_list_delete_personal_view(auth_session): # Get count of existing views list_my_views_json = { "query": """query listMyViews($input: ListMyViewsInput!) {\n @@ -211,8 +203,8 @@ def test_create_list_delete_personal_view(frontend_session): "variables": {"input": {"start": "0", "count": "20"}}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=list_my_views_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=list_my_views_json ) response.raise_for_status() res_data = response.json() @@ -258,8 +250,8 @@ def test_create_list_delete_personal_view(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=create_view_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=create_view_json ) response.raise_for_status() res_data = response.json() @@ -272,7 +264,7 @@ def test_create_list_delete_personal_view(frontend_session): view_urn = res_data["data"]["createView"]["urn"] new_count = _ensure_more_views( - frontend_session=frontend_session, + auth_session=auth_session, list_views_json=list_my_views_json, query_name="listMyViews", before_count=before_count, @@ -286,25 +278,23 @@ def test_create_list_delete_personal_view(frontend_session): "variables": {"urn": view_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=delete_view_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=delete_view_json ) response.raise_for_status() res_data = response.json() assert "errors" not in res_data _ensure_less_views( - frontend_session=frontend_session, + auth_session=auth_session, list_views_json=list_my_views_json, query_name="listMyViews", before_count=new_count, ) -@pytest.mark.dependency( - depends=["test_healthchecks", "test_create_list_delete_personal_view"] -) -def test_update_global_view(frontend_session): +@pytest.mark.dependency(depends=["test_create_list_delete_personal_view"]) +def test_update_global_view(auth_session): # First create a view new_view_name = "Test View" new_view_description = "Test Description" @@ -340,8 +330,8 @@ def test_update_global_view(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=create_view_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=create_view_json ) response.raise_for_status() res_data = response.json() @@ -386,8 +376,8 @@ def test_update_global_view(frontend_session): }, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=update_view_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=update_view_json ) response.raise_for_status() res_data = response.json() @@ -404,8 +394,8 @@ def test_update_global_view(frontend_session): "variables": {"urn": view_urn}, } - response = frontend_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=delete_view_json + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=delete_view_json ) response.raise_for_status() res_data = response.json() From 1a73c664f09a766306a89e38d5a2d50ee5437d95 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Fri, 27 Sep 2024 22:16:22 +0530 Subject: [PATCH 2/8] feat(ingest/databricks): add usage perf report (#11480) --- .../datahub/ingestion/source/unity/report.py | 14 ++- .../datahub/ingestion/source/unity/usage.py | 109 ++++++++++-------- .../performance/databricks/test_unity.py | 7 +- .../databricks/unity_proxy_mock.py | 12 +- 4 files changed, 87 insertions(+), 55 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py index 02eedb67f4cc2..a00a52ae54207 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/report.py @@ -1,10 +1,19 @@ from dataclasses import dataclass, field from typing import Optional, Tuple -from datahub.ingestion.api.report import EntityFilterReport +from datahub.ingestion.api.report import EntityFilterReport, Report from datahub.ingestion.source.sql.sql_generic_profiler import ProfilingSqlReport from datahub.ingestion.source_report.ingestion_stage import IngestionStageReport from datahub.utilities.lossy_collections import LossyDict, LossyList +from datahub.utilities.perf_timer import PerfTimer + + +@dataclass +class UnityCatalogUsagePerfReport(Report): + get_queries_timer: PerfTimer = field(default_factory=PerfTimer) + sql_parsing_timer: PerfTimer = field(default_factory=PerfTimer) + aggregator_add_event_timer: PerfTimer = field(default_factory=PerfTimer) + gen_operation_timer: PerfTimer = field(default_factory=PerfTimer) @dataclass @@ -27,6 +36,9 @@ class UnityCatalogReport(IngestionStageReport, ProfilingSqlReport): num_queries_missing_table: int = 0 # Can be due to pattern filter num_queries_duplicate_table: int = 0 num_queries_parsed_by_spark_plan: int = 0 + usage_perf_report: UnityCatalogUsagePerfReport = field( + default_factory=UnityCatalogUsagePerfReport + ) # Distinguish from Operations emitted for created / updated timestamps num_operational_stats_workunits_emitted: int = 0 diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py index 5eec2ca587ead..08482c9d2fa3b 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/usage.py @@ -81,20 +81,25 @@ def _get_workunits_internal( table_map[f"{ref.schema}.{ref.table}"].append(ref) table_map[ref.qualified_table_name].append(ref) - for query in self._get_queries(): - self.report.num_queries += 1 - table_info = self._parse_query(query, table_map) - if table_info is not None: - if self.config.include_operational_stats: - yield from self._generate_operation_workunit(query, table_info) - for source_table in table_info.source_tables: - self.usage_aggregator.aggregate_event( - resource=source_table, - start_time=query.start_time, - query=query.query_text, - user=query.user_name, - fields=[], - ) + with self.report.usage_perf_report.get_queries_timer as current_timer: + for query in self._get_queries(): + self.report.num_queries += 1 + with current_timer.pause(): + table_info = self._parse_query(query, table_map) + if table_info is not None: + if self.config.include_operational_stats: + yield from self._generate_operation_workunit( + query, table_info + ) + for source_table in table_info.source_tables: + with self.report.usage_perf_report.aggregator_add_event_timer: + self.usage_aggregator.aggregate_event( + resource=source_table, + start_time=query.start_time, + query=query.query_text, + user=query.user_name, + fields=[], + ) if not self.report.num_queries: logger.warning("No queries found in the given time range.") @@ -117,29 +122,34 @@ def _get_workunits_internal( def _generate_operation_workunit( self, query: Query, table_info: QueryTableInfo ) -> Iterable[MetadataWorkUnit]: - if ( - not query.statement_type - or query.statement_type not in OPERATION_STATEMENT_TYPES - ): - return None + with self.report.usage_perf_report.gen_operation_timer: + if ( + not query.statement_type + or query.statement_type not in OPERATION_STATEMENT_TYPES + ): + return None - # Not sure about behavior when there are multiple target tables. This is a best attempt. - for target_table in table_info.target_tables: - operation_aspect = OperationClass( - timestampMillis=int(time.time() * 1000), - lastUpdatedTimestamp=int(query.end_time.timestamp() * 1000), - actor=( - self.user_urn_builder(query.user_name) if query.user_name else None - ), - operationType=OPERATION_STATEMENT_TYPES[query.statement_type], - affectedDatasets=[ - self.table_urn_builder(table) for table in table_info.source_tables - ], - ) - self.report.num_operational_stats_workunits_emitted += 1 - yield MetadataChangeProposalWrapper( - entityUrn=self.table_urn_builder(target_table), aspect=operation_aspect - ).as_workunit() + # Not sure about behavior when there are multiple target tables. This is a best attempt. + for target_table in table_info.target_tables: + operation_aspect = OperationClass( + timestampMillis=int(time.time() * 1000), + lastUpdatedTimestamp=int(query.end_time.timestamp() * 1000), + actor=( + self.user_urn_builder(query.user_name) + if query.user_name + else None + ), + operationType=OPERATION_STATEMENT_TYPES[query.statement_type], + affectedDatasets=[ + self.table_urn_builder(table) + for table in table_info.source_tables + ], + ) + self.report.num_operational_stats_workunits_emitted += 1 + yield MetadataChangeProposalWrapper( + entityUrn=self.table_urn_builder(target_table), + aspect=operation_aspect, + ).as_workunit() def _get_queries(self) -> Iterable[Query]: try: @@ -153,18 +163,23 @@ def _get_queries(self) -> Iterable[Query]: def _parse_query( self, query: Query, table_map: TableMap ) -> Optional[QueryTableInfo]: - table_info = self._parse_query_via_lineage_runner(query.query_text) - if table_info is None and query.statement_type == QueryStatementType.SELECT: - table_info = self._parse_query_via_spark_sql_plan(query.query_text) + with self.report.usage_perf_report.sql_parsing_timer: + table_info = self._parse_query_via_lineage_runner(query.query_text) + if table_info is None and query.statement_type == QueryStatementType.SELECT: + table_info = self._parse_query_via_spark_sql_plan(query.query_text) - if table_info is None: - self.report.num_queries_dropped_parse_failure += 1 - return None - else: - return QueryTableInfo( - source_tables=self._resolve_tables(table_info.source_tables, table_map), - target_tables=self._resolve_tables(table_info.target_tables, table_map), - ) + if table_info is None: + self.report.num_queries_dropped_parse_failure += 1 + return None + else: + return QueryTableInfo( + source_tables=self._resolve_tables( + table_info.source_tables, table_map + ), + target_tables=self._resolve_tables( + table_info.target_tables, table_map + ), + ) def _parse_query_via_lineage_runner(self, query: str) -> Optional[StringTableInfo]: try: diff --git a/metadata-ingestion/tests/performance/databricks/test_unity.py b/metadata-ingestion/tests/performance/databricks/test_unity.py index 6592ffe5198c1..ddd19804ba184 100644 --- a/metadata-ingestion/tests/performance/databricks/test_unity.py +++ b/metadata-ingestion/tests/performance/databricks/test_unity.py @@ -40,7 +40,10 @@ def run_test(): print("Data generated") config = UnityCatalogSourceConfig( - token="", workspace_url="http://localhost:1234", include_usage_statistics=False + token="", + workspace_url="http://localhost:1234", + include_usage_statistics=True, + include_hive_metastore=False, ) ctx = PipelineContext(run_id="test") with patch( @@ -61,7 +64,7 @@ def run_test(): print( f"Peak Memory Used: {humanfriendly.format_size(peak_memory_usage - pre_mem_usage)}" ) - print(source.report.aspects) + print(source.report.as_string()) if __name__ == "__main__": diff --git a/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py b/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py index cb3a1c165acdd..307a7ba71ef83 100644 --- a/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py +++ b/metadata-ingestion/tests/performance/databricks/unity_proxy_mock.py @@ -1,7 +1,7 @@ import uuid from collections import defaultdict from datetime import datetime, timezone -from typing import Dict, Iterable, List +from typing import Dict, Iterable, List, Optional from databricks.sdk.service.catalog import ColumnTypeName from databricks.sdk.service.sql import QueryStatementType @@ -57,13 +57,15 @@ def assigned_metastore(self) -> Metastore: region=None, ) - def catalogs(self, metastore: Metastore) -> Iterable[Catalog]: + def catalogs(self, metastore: Optional[Metastore]) -> Iterable[Catalog]: for container in self.seed_metadata.containers[1]: - if not container.parent or metastore.name != container.parent.name: + if not container.parent or ( + metastore and metastore.name != container.parent.name + ): continue yield Catalog( - id=f"{metastore.id}.{container.name}", + id=f"{metastore.id}.{container.name}" if metastore else container.name, name=container.name, metastore=metastore, comment=None, @@ -153,7 +155,7 @@ def query_history( executed_as_user_name=None, ) - def table_lineage(self, table: Table) -> None: + def table_lineage(self, table: Table, include_entity_lineage: bool) -> None: pass def get_column_lineage(self, table: Table) -> None: From 99bfcefb72af5198d9f9c91afb4716b645945362 Mon Sep 17 00:00:00 2001 From: haeniya Date: Fri, 27 Sep 2024 18:46:39 +0200 Subject: [PATCH 3/8] feat(ingestion/tableau): introduce project_path_pattern (#10855) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Yanik Häni --- .../ingestion/source/tableau/tableau.py | 68 ++-- ...roject_path_pattern_allow_mces_golden.json | 352 ++++++++++++++++++ ...project_path_pattern_deny_mces_golden.json | 184 +++++++++ .../tableau/test_tableau_ingest.py | 67 +++- 4 files changed, 645 insertions(+), 26 deletions(-) create mode 100644 metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_allow_mces_golden.json create mode 100644 metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_deny_mces_golden.json diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py index 4a1ec14ca1d4e..9f011790990ec 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py @@ -313,12 +313,22 @@ class TableauConfig( # Tableau project pattern project_pattern: AllowDenyPattern = Field( default=AllowDenyPattern.allow_all(), - description="Filter for specific Tableau projects. For example, use 'My Project' to ingest a root-level Project with name 'My Project', or 'My Project/Nested Project' to ingest a nested Project with name 'Nested Project'. " + description="[deprecated] Use project_path_pattern instead. Filter for specific Tableau projects. For example, use 'My Project' to ingest a root-level Project with name 'My Project', or 'My Project/Nested Project' to ingest a nested Project with name 'Nested Project'. " "By default, all Projects nested inside a matching Project will be included in ingestion. " "You can both allow and deny projects based on their name using their name, or a Regex pattern. " "Deny patterns always take precedence over allow patterns. " "By default, all projects will be ingested.", ) + _deprecate_projects_pattern = pydantic_field_deprecated("project_pattern") + + project_path_pattern: AllowDenyPattern = Field( + default=AllowDenyPattern.allow_all(), + description="Filters Tableau projects by their full path. For instance, 'My Project/Nested Project' targets a specific nested project named 'Nested Project'." + " This is also useful when you need to exclude all nested projects under a particular project." + " You can allow or deny projects by specifying their path or a regular expression pattern." + " Deny patterns always override allow patterns." + " By default, all projects are ingested.", + ) project_path_separator: str = Field( default="/", @@ -454,17 +464,23 @@ class TableauConfig( def projects_backward_compatibility(cls, values: Dict) -> Dict: projects = values.get("projects") project_pattern = values.get("project_pattern") - if project_pattern is None and projects: + project_path_pattern = values.get("project_path_pattern") + if project_pattern is None and project_path_pattern is None and projects: logger.warning( - "project_pattern is not set but projects is set. projects is deprecated, please use " - "project_pattern instead." + "projects is deprecated, please use " "project_path_pattern instead." ) logger.info("Initializing project_pattern from projects") values["project_pattern"] = AllowDenyPattern( allow=[f"^{prj}$" for prj in projects] ) - elif project_pattern != AllowDenyPattern.allow_all() and projects: - raise ValueError("projects is deprecated. Please use project_pattern only.") + elif (project_pattern or project_path_pattern) and projects: + raise ValueError( + "projects is deprecated. Please use project_path_pattern only." + ) + elif project_path_pattern and project_pattern: + raise ValueError( + "project_pattern is deprecated. Please use project_path_pattern only." + ) return values @@ -850,12 +866,13 @@ def form_path(project_id: str) -> List[str]: def _is_allowed_project(self, project: TableauProject) -> bool: # Either project name or project path should exist in allow - is_allowed: bool = self.config.project_pattern.allowed( - project.name - ) or self.config.project_pattern.allowed(self._get_project_path(project)) + is_allowed: bool = ( + self.config.project_pattern.allowed(project.name) + or self.config.project_pattern.allowed(self._get_project_path(project)) + ) and self.config.project_path_pattern.allowed(self._get_project_path(project)) if is_allowed is False: logger.info( - f"project({project.name}) is not allowed as per project_pattern" + f"Project ({project.name}) is not allowed as per project_pattern or project_path_pattern" ) return is_allowed @@ -887,28 +904,29 @@ def _init_tableau_project_registry(self, all_project_map: dict) -> None: logger.debug(f"Project {project.name} is added in project registry") projects_to_ingest[project.id] = project - # We rely on automatic browse paths (v2) when creating containers. That's why we need to sort the projects here. - # Otherwise, nested projects will not have the correct browse paths if not created in correct order / hierarchy. - self.tableau_project_registry = OrderedDict( - sorted(projects_to_ingest.items(), key=lambda item: len(item[1].path)) - ) - if self.config.extract_project_hierarchy is False: logger.debug( "Skipping project hierarchy processing as configuration extract_project_hierarchy is " "disabled" ) - return + else: + logger.debug( + "Reevaluating projects as extract_project_hierarchy is enabled" + ) - logger.debug("Reevaluating projects as extract_project_hierarchy is enabled") + for project in list_of_skip_projects: + if ( + project.parent_id in projects_to_ingest + and self._is_denied_project(project) is False + ): + logger.debug(f"Project {project.name} is added in project registry") + projects_to_ingest[project.id] = project - for project in list_of_skip_projects: - if ( - project.parent_id in self.tableau_project_registry - and self._is_denied_project(project) is False - ): - logger.debug(f"Project {project.name} is added in project registry") - self.tableau_project_registry[project.id] = project + # We rely on automatic browse paths (v2) when creating containers. That's why we need to sort the projects here. + # Otherwise, nested projects will not have the correct browse paths if not created in correct order / hierarchy. + self.tableau_project_registry = OrderedDict( + sorted(projects_to_ingest.items(), key=lambda item: len(item[1].path)) + ) def _init_datasource_registry(self) -> None: if self.server is None: diff --git a/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_allow_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_allow_mces_golden.json new file mode 100644 index 0000000000000..8798ca291422c --- /dev/null +++ b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_allow_mces_golden.json @@ -0,0 +1,352 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "190a6a5c-63ed-4de1-8045-faeae5df5b01" + }, + "name": "default" + } + }, + "systemMetadata": { + "lastObserved": 1727349368101, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368102, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368103, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368104, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1727349368105, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "79d02655-88e5-45a6-9f9b-eeaf5fe54903" + }, + "name": "DenyProject" + } + }, + "systemMetadata": { + "lastObserved": 1727349368108, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368109, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368109, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368110, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + } + }, + "systemMetadata": { + "lastObserved": 1727349368111, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368112, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "workbook_id": "ee012e36-d916-4c21-94ab-f0d66736af4e" + }, + "externalUrl": "https://do-not-connect/#/site/acryl/workbooks/17904", + "name": "Deny Pattern WorkBook", + "description": "" + } + }, + "systemMetadata": { + "lastObserved": 1727349368113, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368114, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368115, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Workbook" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368116, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:jawadqu@gmail.com", + "type": "DATAOWNER" + } + ], + "ownerTypes": {}, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1727349368117, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce" + } + }, + "systemMetadata": { + "lastObserved": 1727349368118, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:595877512935338b94eac9e06cf20607", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b", + "urn": "urn:li:container:5ec314b9630974ec084f5dfd3849f87b" + }, + { + "id": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce", + "urn": "urn:li:container:beaddce9d1e89ab503ae6408fb77d4ce" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368118, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_deny_mces_golden.json b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_deny_mces_golden.json new file mode 100644 index 0000000000000..96dcfeb246c91 --- /dev/null +++ b/metadata-ingestion/tests/integration/tableau/tableau_project_path_pattern_deny_mces_golden.json @@ -0,0 +1,184 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "c30aafe5-44f4-4f28-80d3-d181010a263c" + }, + "name": "Project 2" + } + }, + "systemMetadata": { + "lastObserved": 1727349368232, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368233, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368233, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368234, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:252a054d4dd93cd657735aa46dd71370", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1727349368235, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "tableau", + "project_id": "910733aa-2e95-4ac3-a2e8-71570751099d" + }, + "name": "Samples" + } + }, + "systemMetadata": { + "lastObserved": 1727349368238, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1727349368239, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:tableau" + } + }, + "systemMetadata": { + "lastObserved": 1727349368239, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Project" + ] + } + }, + "systemMetadata": { + "lastObserved": 1727349368240, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:d2dcd6bd1bb954d62f1cfc68332ee873", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1727349368241, + "runId": "tableau-test", + "lastRunId": "no-run-id-provided", + "pipelineName": "tableau-test-pipeline" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py index 4be39f02757ba..5a5552a78c56f 100644 --- a/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py +++ b/metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py @@ -545,7 +545,72 @@ def test_value_error_projects_and_project_pattern( pipeline_config=new_config, ) except Exception as e: - assert "projects is deprecated. Please use project_pattern only" in str(e) + assert "projects is deprecated. Please use project_path_pattern only" in str(e) + + +def test_project_pattern_deprecation(pytestconfig, tmp_path, mock_datahub_graph): + # Ingestion should raise ValueError + output_file_name: str = "tableau_project_pattern_deprecation_mces.json" + golden_file_name: str = "tableau_project_pattern_deprecation_mces_golden.json" + + new_config = config_source_default.copy() + del new_config["projects"] + new_config["project_pattern"] = {"allow": ["^Samples$"]} + new_config["project_path_pattern"] = {"allow": ["^Samples$"]} + + try: + tableau_ingest_common( + pytestconfig, + tmp_path, + mock_data(), + golden_file_name, + output_file_name, + mock_datahub_graph, + pipeline_config=new_config, + ) + except Exception as e: + assert ( + "project_pattern is deprecated. Please use project_path_pattern only" + in str(e) + ) + + +def test_project_path_pattern_allow(pytestconfig, tmp_path, mock_datahub_graph): + output_file_name: str = "tableau_project_path_pattern_allow_mces.json" + golden_file_name: str = "tableau_project_path_pattern_allow_mces_golden.json" + + new_config = config_source_default.copy() + del new_config["projects"] + new_config["project_path_pattern"] = {"allow": ["default/DenyProject"]} + + tableau_ingest_common( + pytestconfig, + tmp_path, + mock_data(), + golden_file_name, + output_file_name, + mock_datahub_graph, + pipeline_config=new_config, + ) + + +def test_project_path_pattern_deny(pytestconfig, tmp_path, mock_datahub_graph): + output_file_name: str = "tableau_project_path_pattern_deny_mces.json" + golden_file_name: str = "tableau_project_path_pattern_deny_mces_golden.json" + + new_config = config_source_default.copy() + del new_config["projects"] + new_config["project_path_pattern"] = {"deny": ["^default.*"]} + + tableau_ingest_common( + pytestconfig, + tmp_path, + mock_data(), + golden_file_name, + output_file_name, + mock_datahub_graph, + pipeline_config=new_config, + ) @freeze_time(FROZEN_TIME) From a8e6a06ea4a355c1fe97d7774bb7841d17753238 Mon Sep 17 00:00:00 2001 From: Jonny Dixon <45681293+acrylJonny@users.noreply.github.com> Date: Fri, 27 Sep 2024 18:11:06 +0100 Subject: [PATCH 4/8] docs(ingest/dbt): update run result paths examples (#11138) --- metadata-ingestion/docs/sources/dbt/dbt.md | 8 +++-- .../docs/sources/dbt/dbt_recipe.yml | 3 +- .../tests/integration/dbt/test_dbt.py | 30 +++++++++++++------ 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/metadata-ingestion/docs/sources/dbt/dbt.md b/metadata-ingestion/docs/sources/dbt/dbt.md index 52a19777dd033..9f366f579e5fc 100644 --- a/metadata-ingestion/docs/sources/dbt/dbt.md +++ b/metadata-ingestion/docs/sources/dbt/dbt.md @@ -175,7 +175,7 @@ To integrate with dbt tests, the `dbt` source needs access to the `run_results.j 1. Run `dbt build` 2. Copy the `target/run_results.json` file to a separate location. This is important, because otherwise subsequent `dbt` commands will overwrite the run results. 3. Run `dbt docs generate` to generate the `manifest.json` and `catalog.json` files -4. The dbt source makes use of the manifest, catalog, and run results file, and hence will need to be moved to a location accessible to the `dbt` source (e.g. s3 or local file system). In the ingestion recipe, the `test_results_path` config must be set to the location of the `run_results.json` file from the `dbt build` or `dbt test` run. +4. The dbt source makes use of the manifest, catalog, and run results file, and hence will need to be moved to a location accessible to the `dbt` source (e.g. s3 or local file system). In the ingestion recipe, the `run_results_paths` config must be set to the location of the `run_results.json` file from the `dbt build` or `dbt test` run. The connector will produce the following things: @@ -219,7 +219,8 @@ source: config: manifest_path: _path_to_manifest_json catalog_path: _path_to_catalog_json - test_results_path: _path_to_run_results_json + run_results_paths: + - _path_to_run_results_json target_platform: postgres entities_enabled: test_results: Only @@ -233,7 +234,8 @@ source: config: manifest_path: _path_to_manifest_json catalog_path: _path_to_catalog_json - run_results_path: _path_to_run_results_json + run_results_paths: + - _path_to_run_results_json target_platform: postgres entities_enabled: test_results: No diff --git a/metadata-ingestion/docs/sources/dbt/dbt_recipe.yml b/metadata-ingestion/docs/sources/dbt/dbt_recipe.yml index 251aba44db387..e6949af4cf6ff 100644 --- a/metadata-ingestion/docs/sources/dbt/dbt_recipe.yml +++ b/metadata-ingestion/docs/sources/dbt/dbt_recipe.yml @@ -6,7 +6,8 @@ source: manifest_path: "${DBT_PROJECT_ROOT}/target/manifest_file.json" catalog_path: "${DBT_PROJECT_ROOT}/target/catalog_file.json" sources_path: "${DBT_PROJECT_ROOT}/target/sources_file.json" # optional for freshness - test_results_path: "${DBT_PROJECT_ROOT}/target/run_results.json" # optional for recording dbt test results after running dbt test + run_results_paths: + - "${DBT_PROJECT_ROOT}/target/run_results.json" # optional for recording dbt test results after running dbt test # Options target_platform: "my_target_platform_id" # e.g. bigquery/postgres/etc. diff --git a/metadata-ingestion/tests/integration/dbt/test_dbt.py b/metadata-ingestion/tests/integration/dbt/test_dbt.py index d60bb425c1ff5..390d8d7698dd4 100644 --- a/metadata-ingestion/tests/integration/dbt/test_dbt.py +++ b/metadata-ingestion/tests/integration/dbt/test_dbt.py @@ -339,9 +339,13 @@ def test_dbt_tests(test_resources_dir, pytestconfig, tmp_path, mock_time, **kwar (test_resources_dir / "jaffle_shop_catalog.json").resolve() ), target_platform="postgres", - test_results_path=str( - (test_resources_dir / "jaffle_shop_test_results.json").resolve() - ), + run_results_paths=[ + str( + ( + test_resources_dir / "jaffle_shop_test_results.json" + ).resolve() + ) + ], ), ), sink=DynamicTypedConfig(type="file", config={"filename": str(output_file)}), @@ -442,9 +446,13 @@ def test_dbt_tests_only_assertions( (test_resources_dir / "jaffle_shop_catalog.json").resolve() ), target_platform="postgres", - test_results_path=str( - (test_resources_dir / "jaffle_shop_test_results.json").resolve() - ), + run_results_paths=[ + str( + ( + test_resources_dir / "jaffle_shop_test_results.json" + ).resolve() + ) + ], entities_enabled=DBTEntitiesEnabled( test_results=EmitDirective.ONLY ), @@ -518,9 +526,13 @@ def test_dbt_only_test_definitions_and_results( (test_resources_dir / "jaffle_shop_catalog.json").resolve() ), target_platform="postgres", - test_results_path=str( - (test_resources_dir / "jaffle_shop_test_results.json").resolve() - ), + run_results_paths=[ + str( + ( + test_resources_dir / "jaffle_shop_test_results.json" + ).resolve() + ) + ], entities_enabled=DBTEntitiesEnabled( sources=EmitDirective.NO, seeds=EmitDirective.NO, From 07034caf09a930ad39fc2a594d1503bdf6c34ed8 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 27 Sep 2024 10:24:22 -0700 Subject: [PATCH 5/8] feat(ingest): support `DATAHUB_INCLUDE_ENV_IN_CONTAINER_PROPERTIES` (#11476) --- .../datahub/configuration/source_common.py | 7 ++---- .../src/datahub/emitter/enum_helpers.py | 11 ++++++++++ .../src/datahub/emitter/mce_builder.py | 19 ++++------------ .../src/datahub/emitter/mcp_builder.py | 22 +++++++++++++++---- metadata-ingestion/tests/unit/test_codegen.py | 8 +++++++ 5 files changed, 43 insertions(+), 24 deletions(-) create mode 100644 metadata-ingestion/src/datahub/emitter/enum_helpers.py diff --git a/metadata-ingestion/src/datahub/configuration/source_common.py b/metadata-ingestion/src/datahub/configuration/source_common.py index 7160aa6fc339d..ad12447532335 100644 --- a/metadata-ingestion/src/datahub/configuration/source_common.py +++ b/metadata-ingestion/src/datahub/configuration/source_common.py @@ -4,14 +4,11 @@ from pydantic.fields import Field from datahub.configuration.common import ConfigModel +from datahub.emitter.enum_helpers import get_enum_options from datahub.metadata.schema_classes import FabricTypeClass DEFAULT_ENV = FabricTypeClass.PROD - -# Get all the constants from the FabricTypeClass. It's not an enum, so this is a bit hacky but works. -ALL_ENV_TYPES: Set[str] = { - value for name, value in vars(FabricTypeClass).items() if not name.startswith("_") -} +ALL_ENV_TYPES: Set[str] = set(get_enum_options(FabricTypeClass)) class PlatformInstanceConfigMixin(ConfigModel): diff --git a/metadata-ingestion/src/datahub/emitter/enum_helpers.py b/metadata-ingestion/src/datahub/emitter/enum_helpers.py new file mode 100644 index 0000000000000..89949ab3717ff --- /dev/null +++ b/metadata-ingestion/src/datahub/emitter/enum_helpers.py @@ -0,0 +1,11 @@ +from typing import List, Type + + +def get_enum_options(_class: Type[object]) -> List[str]: + """Get the valid values for an enum in the datahub.metadata.schema_classes module.""" + + return [ + value + for name, value in vars(_class).items() + if not callable(value) and not name.startswith("_") + ] diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index e273bab62fe7a..d3a930d988171 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -11,7 +11,6 @@ from typing import ( TYPE_CHECKING, Any, - Iterable, List, Optional, Tuple, @@ -25,7 +24,8 @@ import typing_inspect from avrogen.dict_wrapper import DictWrapper -from datahub.configuration.source_common import DEFAULT_ENV as DEFAULT_ENV_CONFIGURATION +from datahub.configuration.source_common import DEFAULT_ENV +from datahub.emitter.enum_helpers import get_enum_options from datahub.metadata.schema_classes import ( AssertionKeyClass, AuditStampClass, @@ -50,15 +50,12 @@ UpstreamLineageClass, _Aspect as AspectAbstract, ) +from datahub.metadata.urns import DataFlowUrn, DatasetUrn, TagUrn from datahub.utilities.urn_encoder import UrnEncoder -from datahub.utilities.urns.data_flow_urn import DataFlowUrn -from datahub.utilities.urns.dataset_urn import DatasetUrn -from datahub.utilities.urns.tag_urn import TagUrn logger = logging.getLogger(__name__) Aspect = TypeVar("Aspect", bound=AspectAbstract) -DEFAULT_ENV = DEFAULT_ENV_CONFIGURATION DEFAULT_FLOW_CLUSTER = "prod" UNKNOWN_USER = "urn:li:corpuser:unknown" DATASET_URN_TO_LOWER: bool = ( @@ -374,19 +371,11 @@ def make_ml_model_group_urn(platform: str, group_name: str, env: str) -> str: ) -def _get_enum_options(_class: Type[object]) -> Iterable[str]: - return [ - f - for f in dir(_class) - if not callable(getattr(_class, f)) and not f.startswith("_") - ] - - def validate_ownership_type(ownership_type: str) -> Tuple[str, Optional[str]]: if ownership_type.startswith("urn:li:"): return OwnershipTypeClass.CUSTOM, ownership_type ownership_type = ownership_type.upper() - if ownership_type in _get_enum_options(OwnershipTypeClass): + if ownership_type in get_enum_options(OwnershipTypeClass): return ownership_type, None raise ValueError(f"Unexpected ownership type: {ownership_type}") diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py index cc2a1dc7a7322..b7fb1fd56891c 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py @@ -3,6 +3,8 @@ from pydantic.fields import Field from pydantic.main import BaseModel +from datahub.cli.env_utils import get_boolean_env_variable +from datahub.emitter.enum_helpers import get_enum_options from datahub.emitter.mce_builder import ( Aspect, datahub_guid, @@ -34,6 +36,16 @@ TagAssociationClass, ) +# In https://github.com/datahub-project/datahub/pull/11214, we added a +# new env field to container properties. However, populating this field +# with servers older than 0.14.1 will cause errors. This environment +# variable is an escape hatch to avoid this compatibility issue. +# TODO: Once the model change has been deployed for a while, we can remove this. +# Probably can do it at the beginning of 2025. +_INCLUDE_ENV_IN_CONTAINER_PROPERTIES = get_boolean_env_variable( + "DATAHUB_INCLUDE_ENV_IN_CONTAINER_PROPERTIES", default=True +) + class DatahubKey(BaseModel): def guid_dict(self) -> Dict[str, str]: @@ -191,16 +203,18 @@ def gen_containers( created: Optional[int] = None, last_modified: Optional[int] = None, ) -> Iterable[MetadataWorkUnit]: - # because of backwards compatibility with a past issue, container_key.env may be a valid env or an instance name + # Extra validation on the env field. + # In certain cases (mainly for backwards compatibility), the env field will actually + # have a platform instance name. env = ( container_key.env - if container_key.env in vars(FabricTypeClass).values() + if container_key.env in get_enum_options(FabricTypeClass) else None ) + container_urn = container_key.as_urn() yield MetadataChangeProposalWrapper( entityUrn=f"{container_urn}", - # entityKeyAspect=ContainerKeyClass(guid=parent_container_key.guid()), aspect=ContainerProperties( name=name, description=description, @@ -214,7 +228,7 @@ def gen_containers( lastModified=( TimeStamp(time=last_modified) if last_modified is not None else None ), - env=env if env is not None else None, + env=env if _INCLUDE_ENV_IN_CONTAINER_PROPERTIES else None, ), ).as_workunit() diff --git a/metadata-ingestion/tests/unit/test_codegen.py b/metadata-ingestion/tests/unit/test_codegen.py index 0c7d953e194dc..37ac35586950e 100644 --- a/metadata-ingestion/tests/unit/test_codegen.py +++ b/metadata-ingestion/tests/unit/test_codegen.py @@ -6,9 +6,11 @@ import pytest import typing_inspect +from datahub.emitter.enum_helpers import get_enum_options from datahub.metadata.schema_classes import ( ASPECT_CLASSES, KEY_ASPECTS, + FabricTypeClass, FineGrainedLineageClass, MetadataChangeEventClass, OwnershipClass, @@ -157,3 +159,9 @@ def _err(msg: str) -> None: assert ( not errors ), f'To fix these errors, run "UPDATE_ENTITY_REGISTRY=true pytest {__file__}"' + + +def test_enum_options(): + # This is mainly a sanity check to ensure that it doesn't do anything too crazy. + env_options = get_enum_options(FabricTypeClass) + assert "PROD" in env_options From 5cbd83675ee3c785b0bb6b4085ccd2eb16ab2929 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Fri, 27 Sep 2024 13:11:40 -0500 Subject: [PATCH 6/8] refactor(criterion): refactor criterion construction (#11486) --- .../analytics/resolver/GetChartsResolver.java | 13 +- .../graphql/resolvers/ResolverUtils.java | 36 +-- .../resolvers/auth/DebugAccessResolver.java | 26 +- .../container/ContainerEntitiesResolver.java | 6 +- .../DashboardUsageStatsResolver.java | 8 +- .../dashboard/DashboardUsageStatsUtils.java | 29 +- .../dataset/DatasetHealthResolver.java | 11 +- .../domain/DomainEntitiesResolver.java | 6 +- .../GetRootGlossaryNodesResolver.java | 8 +- .../GetRootGlossaryTermsResolver.java | 8 +- .../health/EntityHealthResolver.java | 11 +- ...estionSourceExecutionRequestsResolver.java | 7 +- .../resolvers/jobs/DataJobRunsResolver.java | 15 +- .../resolvers/jobs/EntityRunsResolver.java | 16 +- .../mutate/util/BusinessAttributeUtils.java | 9 +- .../resolvers/mutate/util/DomainUtils.java | 33 +- .../resolvers/mutate/util/FormUtils.java | 8 +- .../ListRecommendationsResolver.java | 3 +- .../graphql/resolvers/ResolverUtilsTest.java | 43 +-- .../browse/BrowseV2ResolverTest.java | 8 +- .../ContainerEntitiesResolverTest.java | 6 +- .../domain/DomainEntitiesResolverTest.java | 7 +- .../GetRootGlossaryNodesResolverTest.java | 8 +- .../GetRootGlossaryTermsResolverTest.java | 8 +- .../AggregateAcrossEntitiesResolverTest.java | 10 +- .../AutoCompleteForMultipleResolverTest.java | 8 +- .../SearchAcrossEntitiesResolverTest.java | 30 +- .../resolvers/search/SearchUtilsTest.java | 255 ++++------------ .../view/CreateViewResolverTest.java | 35 +-- .../view/ListGlobalViewsResolverTest.java | 17 +- .../view/ListMyViewsResolverTest.java | 43 +-- .../view/UpdateViewResolverTest.java | 68 ++--- .../graphql/resolvers/view/ViewUtilsTest.java | 30 +- .../types/view/DataHubViewTypeTest.java | 62 ++-- .../BackfillBrowsePathsV2Step.java | 22 +- .../BackfillPolicyFieldsStep.java | 6 +- docs/how/updating-datahub.md | 5 +- .../graph/dgraph/DgraphGraphService.java | 4 +- .../graph/elastic/ESGraphQueryDAO.java | 4 +- .../elastic/ElasticSearchGraphService.java | 8 +- .../graph/neo4j/Neo4jGraphService.java | 7 +- .../metadata/search/LineageSearchService.java | 5 +- .../elasticsearch/query/ESBrowseDAO.java | 12 +- .../query/filter/BaseQueryFilterRewriter.java | 5 +- .../request/AggregationQueryBuilder.java | 2 - .../metadata/search/utils/ESUtils.java | 124 +------- .../metadata/search/utils/SearchUtils.java | 5 +- .../PropertyDefinitionDeleteSideEffect.java | 11 +- .../ElasticSearchTimeseriesAspectService.java | 35 ++- .../timeseries/elastic/UsageServiceUtil.java | 20 +- ...ySearchAggregationCandidateSourceTest.java | 15 +- .../search/LineageServiceTestBase.java | 38 +-- .../search/SearchServiceTestBase.java | 42 +-- .../search/fixtures/GoldenTestBase.java | 9 +- .../fixtures/SampleDataFixtureTestBase.java | 17 +- .../search/query/SearchDAOTestBase.java | 63 +--- .../ContainerExpansionRewriterTest.java | 22 +- .../filter/DomainExpansionRewriterTest.java | 22 +- .../request/SearchRequestHandlerTest.java | 110 +------ .../metadata/search/utils/ESUtilsTest.java | 157 +++++----- ...ropertyDefinitionDeleteSideEffectTest.java | 9 +- .../TimeseriesAspectServiceTestBase.java | 287 ++++++++---------- .../hook/siblings/SiblingAssociationHook.java | 8 +- .../invite/InviteTokenService.java | 11 +- .../boot/steps/BackfillBrowsePathsV2Step.java | 13 +- .../gms/factory/search/CacheTest.java | 18 +- .../entity/client/RestliEntityClient.java | 11 +- .../resources/entity/EntityResource.java | 12 +- .../operations/OperationsResource.java | 13 +- .../metadata/entity/DeleteEntityService.java | 20 +- .../EntitySearchAggregationSource.java | 9 +- .../metadata/search/utils/QueryUtils.java | 85 ++---- .../metadata/service/ViewServiceTest.java | 50 ++- .../metadata/utils/CriterionUtils.java | 71 +++++ .../linkedin/metadata/utils/SearchUtil.java | 22 +- 75 files changed, 789 insertions(+), 1511 deletions(-) create mode 100644 metadata-utils/src/main/java/com/linkedin/metadata/utils/CriterionUtils.java diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java index 4847aea224ccd..0fe6e5de0cac6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/analytics/resolver/GetChartsResolver.java @@ -2,6 +2,7 @@ import static com.linkedin.metadata.Constants.CORP_USER_ENTITY_NAME; import static com.linkedin.metadata.Constants.CORP_USER_STATUS_LAST_MODIFIED_FIELD_NAME; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -30,7 +31,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -153,12 +153,11 @@ private SearchResult searchForNewUsers(@Nonnull final OperationContext opContext .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField(CORP_USER_STATUS_LAST_MODIFIED_FIELD_NAME) - .setCondition(Condition.GREATER_THAN) - .setValue( - String.valueOf( - trailingMonthDateRange.getStart())))))))), + buildCriterion( + CORP_USER_STATUS_LAST_MODIFIED_FIELD_NAME, + Condition.GREATER_THAN, + String.valueOf( + trailingMonthDateRange.getStart())))))))), Collections.singletonList( new SortCriterion() .setField(CORP_USER_STATUS_LAST_MODIFIED_FIELD_NAME) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java index 5f873b4bebab3..b1cd0e9165129 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ResolverUtils.java @@ -2,12 +2,12 @@ import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.*; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.datahub.authentication.Authentication; import com.fasterxml.jackson.core.StreamReadConstraints; import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.exception.ValidationException; import com.linkedin.datahub.graphql.generated.AndFilterInput; @@ -154,38 +154,22 @@ public static Filter buildFilter( // Translates a FacetFilterInput (graphql input class) into Criterion (our internal model) public static Criterion criterionFromFilter(final FacetFilterInput filter) { - Criterion result = new Criterion(); - result.setField(filter.getField()); - - // `value` is deprecated in place of `values`- this is to support old query patterns. If values - // is provided, - // this statement will be skipped - if (filter.getValues() == null && filter.getValue() != null) { - result.setValues(new StringArray(filter.getValue())); - result.setValue(filter.getValue()); - } else if (filter.getValues() != null) { - result.setValues(new StringArray(filter.getValues())); - if (!filter.getValues().isEmpty()) { - result.setValue(filter.getValues().get(0)); - } else { - result.setValue(""); - } - } else { - result.setValues(new StringArray()); - result.setValue(""); - } + final Condition condition; if (filter.getCondition() != null) { - result.setCondition(Condition.valueOf(filter.getCondition().toString())); + condition = Condition.valueOf(filter.getCondition().toString()); } else { - result.setCondition(Condition.EQUAL); + condition = Condition.EQUAL; } - if (filter.getNegated() != null) { - result.setNegated(filter.getNegated()); + final List values; + if (filter.getValues() == null && filter.getValue() != null) { + values = Collections.singletonList(filter.getValue()); + } else { + values = filter.getValues(); } - return result; + return buildCriterion(filter.getField(), condition, filter.getNegated(), values); } public static Filter viewFilter( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java index 8372b6b5126a3..4331fe1193fb4 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/auth/DebugAccessResolver.java @@ -1,12 +1,12 @@ package com.linkedin.datahub.graphql.resolvers.auth; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.google.common.collect.ImmutableSet; import com.linkedin.common.EntityRelationship; import com.linkedin.common.EntityRelationships; import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.authorization.AuthorizationUtils; import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; @@ -19,7 +19,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.RelationshipDirection; @@ -199,41 +198,28 @@ private Filter buildFilterToGetPolicies( ConjunctiveCriterionArray conjunctiveCriteria = new ConjunctiveCriterionArray(); final CriterionArray allUsersAndArray = new CriterionArray(); - allUsersAndArray.add( - new Criterion().setField("allUsers").setValue("true").setCondition(Condition.EQUAL)); + allUsersAndArray.add(buildCriterion("allUsers", Condition.EQUAL, "true")); conjunctiveCriteria.add(new ConjunctiveCriterion().setAnd(allUsersAndArray)); final CriterionArray allGroupsAndArray = new CriterionArray(); - allGroupsAndArray.add( - new Criterion().setField("allGroups").setValue("true").setCondition(Condition.EQUAL)); + allGroupsAndArray.add(buildCriterion("allGroups", Condition.EQUAL, "true")); conjunctiveCriteria.add(new ConjunctiveCriterion().setAnd(allGroupsAndArray)); if (user != null && !user.isEmpty()) { final CriterionArray userAndArray = new CriterionArray(); - userAndArray.add( - new Criterion().setField("users").setValue(user).setCondition(Condition.EQUAL)); + userAndArray.add(buildCriterion("users", Condition.EQUAL, user)); conjunctiveCriteria.add(new ConjunctiveCriterion().setAnd(userAndArray)); } if (groups != null && !groups.isEmpty()) { final CriterionArray groupsAndArray = new CriterionArray(); - groupsAndArray.add( - new Criterion() - .setField("groups") - .setValue("") - .setValues(new StringArray(groups)) - .setCondition(Condition.EQUAL)); + groupsAndArray.add(buildCriterion("groups", Condition.EQUAL, groups)); conjunctiveCriteria.add(new ConjunctiveCriterion().setAnd(groupsAndArray)); } if (roles != null && !roles.isEmpty()) { final CriterionArray rolesAndArray = new CriterionArray(); - rolesAndArray.add( - new Criterion() - .setField("roles") - .setValue("") - .setValues(new StringArray(roles)) - .setCondition(Condition.EQUAL)); + rolesAndArray.add(buildCriterion("roles", Condition.EQUAL, roles)); conjunctiveCriteria.add(new ConjunctiveCriterion().setAnd(rolesAndArray)); } return new Filter().setOr(conjunctiveCriteria); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java index 5a3207633c07c..82a476ec56ddc 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.container; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.google.common.collect.ImmutableList; import com.linkedin.datahub.graphql.QueryContext; @@ -74,10 +75,7 @@ public CompletableFuture get(final DataFetchingEnvironment enviro try { final Criterion filterCriterion = - new Criterion() - .setField(CONTAINER_FIELD_NAME + ".keyword") - .setCondition(Condition.EQUAL) - .setValue(urn); + buildCriterion(CONTAINER_FIELD_NAME + ".keyword", Condition.EQUAL, urn); return UrnSearchResultsMapper.map( context, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsResolver.java index 7e4a9c8a80388..70e62de20b567 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.dashboard; import static com.linkedin.datahub.graphql.resolvers.dashboard.DashboardUsageStatsUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; @@ -14,7 +15,6 @@ import com.linkedin.datahub.graphql.types.dashboard.mappers.DashboardUsageMetricMapper; import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.EnvelopedAspect; -import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; import com.linkedin.metadata.query.filter.Criterion; @@ -102,11 +102,7 @@ private List getDashboardUsageMetrics( final ArrayList criteria = new ArrayList<>(); // Add filter for absence of eventGranularity - only consider absolute stats - Criterion excludeTimeBucketsCriterion = - new Criterion() - .setField(ES_FIELD_EVENT_GRANULARITY) - .setCondition(Condition.IS_NULL) - .setValue(""); + Criterion excludeTimeBucketsCriterion = buildIsNullCriterion(ES_FIELD_EVENT_GRANULARITY); criteria.add(excludeTimeBucketsCriterion); filter.setOr( new ConjunctiveCriterionArray( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsUtils.java index 93c08d37c2e36..9c0b1f450b831 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dashboard/DashboardUsageStatsUtils.java @@ -1,5 +1,8 @@ package com.linkedin.datahub.graphql.resolvers.dashboard; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; +import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion; + import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; @@ -319,27 +322,22 @@ public static Filter createUsageFilter( final ArrayList criteria = new ArrayList<>(); // Add filter for urn == dashboardUrn - Criterion dashboardUrnCriterion = - new Criterion().setField(ES_FIELD_URN).setCondition(Condition.EQUAL).setValue(dashboardUrn); + Criterion dashboardUrnCriterion = buildCriterion(ES_FIELD_URN, Condition.EQUAL, dashboardUrn); criteria.add(dashboardUrnCriterion); if (startTime != null) { // Add filter for start time Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(Long.toString(startTime)); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, Long.toString(startTime)); criteria.add(startTimeCriterion); } if (endTime != null) { // Add filter for end time Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(Long.toString(endTime)); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.LESS_THAN_OR_EQUAL_TO, Long.toString(endTime)); criteria.add(endTimeCriterion); } @@ -348,18 +346,11 @@ public static Filter createUsageFilter( // stats // since unit is mandatory, we assume if eventGranularity contains unit, then it is not null Criterion onlyTimeBucketsCriterion = - new Criterion() - .setField(ES_FIELD_EVENT_GRANULARITY) - .setCondition(Condition.CONTAIN) - .setValue("unit"); + buildCriterion(ES_FIELD_EVENT_GRANULARITY, Condition.CONTAIN, "unit"); criteria.add(onlyTimeBucketsCriterion); } else { // Add filter for absence of eventGranularity - only consider absolute stats - Criterion excludeTimeBucketsCriterion = - new Criterion() - .setField(ES_FIELD_EVENT_GRANULARITY) - .setCondition(Condition.IS_NULL) - .setValue(""); + Criterion excludeTimeBucketsCriterion = buildIsNullCriterion(ES_FIELD_EVENT_GRANULARITY); criteria.add(excludeTimeBucketsCriterion); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetHealthResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetHealthResolver.java index f38cf80f36ceb..6db581504f93c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetHealthResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/dataset/DatasetHealthResolver.java @@ -1,5 +1,8 @@ package com.linkedin.datahub.graphql.resolvers.dataset; +import static com.linkedin.metadata.Constants.ASSERTION_RUN_EVENT_STATUS_COMPLETE; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; import com.google.common.collect.ImmutableList; @@ -197,16 +200,12 @@ private Filter createAssertionsFilter(final String datasetUrn) { final ArrayList criteria = new ArrayList<>(); // Add filter for asserteeUrn == datasetUrn - Criterion datasetUrnCriterion = - new Criterion().setField("asserteeUrn").setCondition(Condition.EQUAL).setValue(datasetUrn); + Criterion datasetUrnCriterion = buildCriterion("asserteeUrn", Condition.EQUAL, datasetUrn); criteria.add(datasetUrnCriterion); // Add filter for result == result Criterion startTimeCriterion = - new Criterion() - .setField("status") - .setCondition(Condition.EQUAL) - .setValue(Constants.ASSERTION_RUN_EVENT_STATUS_COMPLETE); + buildCriterion("status", Condition.EQUAL, ASSERTION_RUN_EVENT_STATUS_COMPLETE); criteria.add(startTimeCriterion); filter.setOr( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java index c6265380fb2fd..c27fa1d195a76 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolver.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; @@ -69,10 +70,7 @@ public CompletableFuture get(final DataFetchingEnvironment enviro final CriterionArray criteria = new CriterionArray(); final Criterion filterCriterion = - new Criterion() - .setField(DOMAINS_FIELD_NAME + ".keyword") - .setCondition(Condition.EQUAL) - .setValue(urn); + buildCriterion(DOMAINS_FIELD_NAME + ".keyword", Condition.EQUAL, urn); criteria.add(filterCriterion); if (input.getFilters() != null) { input diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolver.java index 451abfdaf1c06..8ec0ecae684a1 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.glossary; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; @@ -15,7 +16,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntity; @@ -84,11 +84,7 @@ public CompletableFuture get( private Filter buildGlossaryEntitiesFilter() { CriterionArray array = new CriterionArray( - ImmutableList.of( - new Criterion() - .setField("hasParentNode") - .setValue("false") - .setCondition(Condition.EQUAL))); + ImmutableList.of(buildCriterion("hasParentNode", Condition.EQUAL, "false"))); final Filter filter = new Filter(); filter.setOr( new ConjunctiveCriterionArray(ImmutableList.of(new ConjunctiveCriterion().setAnd(array)))); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolver.java index 7ca79b168819e..9f6808775fff9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolver.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.glossary; import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; @@ -15,7 +16,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntity; @@ -84,11 +84,7 @@ public CompletableFuture get( private Filter buildGlossaryEntitiesFilter() { CriterionArray array = new CriterionArray( - ImmutableList.of( - new Criterion() - .setField("hasParentNode") - .setValue("false") - .setCondition(Condition.EQUAL))); + ImmutableList.of(buildCriterion("hasParentNode", Condition.EQUAL, "false"))); final Filter filter = new Filter(); filter.setOr( new ConjunctiveCriterionArray(ImmutableList.of(new ConjunctiveCriterion().setAnd(array)))); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/health/EntityHealthResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/health/EntityHealthResolver.java index 380b7c920ab2f..0a8e0e42a0914 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/health/EntityHealthResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/health/EntityHealthResolver.java @@ -1,5 +1,8 @@ package com.linkedin.datahub.graphql.resolvers.health; +import static com.linkedin.metadata.Constants.ASSERTION_RUN_EVENT_STATUS_COMPLETE; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.google.common.collect.ImmutableList; import com.linkedin.common.EntityRelationships; import com.linkedin.data.template.StringArray; @@ -251,16 +254,12 @@ private Filter createAssertionsFilter(final String datasetUrn) { final ArrayList criteria = new ArrayList<>(); // Add filter for asserteeUrn == datasetUrn - Criterion datasetUrnCriterion = - new Criterion().setField("asserteeUrn").setCondition(Condition.EQUAL).setValue(datasetUrn); + Criterion datasetUrnCriterion = buildCriterion("asserteeUrn", Condition.EQUAL, datasetUrn); criteria.add(datasetUrnCriterion); // Add filter for result == result Criterion startTimeCriterion = - new Criterion() - .setField("status") - .setCondition(Condition.EQUAL) - .setValue(Constants.ASSERTION_RUN_EVENT_STATUS_COMPLETE); + buildCriterion("status", Condition.EQUAL, ASSERTION_RUN_EVENT_STATUS_COMPLETE); criteria.add(startTimeCriterion); filter.setOr( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java index a4c2ab42227d9..8110fe93f8ab7 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/IngestionSourceExecutionRequestsResolver.java @@ -1,5 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.ingest.execution; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; @@ -62,10 +64,7 @@ public CompletableFuture get( // 1. Fetch the related edges final Criterion filterCriterion = - new Criterion() - .setField(INGESTION_SOURCE_FIELD_NAME) - .setCondition(Condition.EQUAL) - .setValue(urn); + buildCriterion(INGESTION_SOURCE_FIELD_NAME, Condition.EQUAL, urn); final SearchResult executionsSearchResult = _entityClient.filter( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java index d7c76c0235dcc..dd470e766378b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/DataJobRunsResolver.java @@ -1,5 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.jobs; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; @@ -12,7 +14,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -116,14 +117,10 @@ private Filter buildTaskRunsEntityFilter(final String entityUrn) { CriterionArray array = new CriterionArray( ImmutableList.of( - new Criterion() - .setField(PARENT_TEMPLATE_URN_SEARCH_INDEX_FIELD_NAME) - .setCondition(Condition.EQUAL) - .setValue(entityUrn), - new Criterion() - .setField(HAS_RUN_EVENTS_FIELD_NAME) - .setCondition(Condition.EQUAL) - .setValue(Boolean.TRUE.toString()))); + buildCriterion( + PARENT_TEMPLATE_URN_SEARCH_INDEX_FIELD_NAME, Condition.EQUAL, entityUrn), + buildCriterion( + HAS_RUN_EVENTS_FIELD_NAME, Condition.EQUAL, Boolean.TRUE.toString()))); final Filter filter = new Filter(); filter.setOr( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java index 82c5b73d87152..d71b6f5d01f4f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/jobs/EntityRunsResolver.java @@ -1,5 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.jobs; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; @@ -15,7 +17,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -121,13 +122,12 @@ private Filter buildTaskRunsEntityFilter( CriterionArray array = new CriterionArray( ImmutableList.of( - new Criterion() - .setField( - direction.equals(RelationshipDirection.INCOMING) - ? INPUT_FIELD_NAME - : OUTPUT_FIELD_NAME) - .setCondition(Condition.EQUAL) - .setValue(entityUrn))); + buildCriterion( + direction.equals(RelationshipDirection.INCOMING) + ? INPUT_FIELD_NAME + : OUTPUT_FIELD_NAME, + Condition.EQUAL, + entityUrn))); final Filter filter = new Filter(); filter.setOr( new ConjunctiveCriterionArray(ImmutableList.of(new ConjunctiveCriterion().setAnd(array)))); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/BusinessAttributeUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/BusinessAttributeUtils.java index 25dc36f74ef73..de5176ca440a6 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/BusinessAttributeUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/BusinessAttributeUtils.java @@ -1,12 +1,13 @@ package com.linkedin.datahub.graphql.resolvers.mutate.util; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchResult; @@ -60,11 +61,7 @@ private static Filter buildNameFilter(String name) { } private static CriterionArray buildNameCriterion(@Nonnull final String name) { - return new CriterionArray( - new Criterion() - .setField(NAME_INDEX_FIELD_NAME) - .setValue(name) - .setCondition(Condition.EQUAL)); + return new CriterionArray(buildCriterion(NAME_INDEX_FIELD_NAME, Condition.EQUAL, name)); } public static SchemaFieldDataType mapSchemaFieldDataType( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java index 1dcdd988f5e7c..4224f75773200 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java @@ -2,6 +2,8 @@ import static com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils.*; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; +import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion; import com.datahub.authorization.ConjunctivePrivilegeGroup; import com.datahub.authorization.DisjunctivePrivilegeGroup; @@ -118,16 +120,9 @@ public static void validateDomain( private static List buildRootDomainCriteria() { final List criteria = new ArrayList<>(); - criteria.add( - new Criterion() - .setField(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME) - .setValue("false") - .setCondition(Condition.EQUAL)); - criteria.add( - new Criterion() - .setField(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME) - .setValue("") - .setCondition(Condition.IS_NULL)); + criteria.add(buildCriterion(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME, Condition.EQUAL, "false")); + + criteria.add(buildIsNullCriterion(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME)); return criteria; } @@ -135,25 +130,17 @@ private static List buildRootDomainCriteria() { private static List buildParentDomainCriteria(@Nonnull final Urn parentDomainUrn) { final List criteria = new ArrayList<>(); + criteria.add(buildCriterion(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME, Condition.EQUAL, "true")); + criteria.add( - new Criterion() - .setField(HAS_PARENT_DOMAIN_INDEX_FIELD_NAME) - .setValue("true") - .setCondition(Condition.EQUAL)); - criteria.add( - new Criterion() - .setField(PARENT_DOMAIN_INDEX_FIELD_NAME) - .setValue(parentDomainUrn.toString()) - .setCondition(Condition.EQUAL)); + buildCriterion( + PARENT_DOMAIN_INDEX_FIELD_NAME, Condition.EQUAL, parentDomainUrn.toString())); return criteria; } private static Criterion buildNameCriterion(@Nonnull final String name) { - return new Criterion() - .setField(NAME_INDEX_FIELD_NAME) - .setValue(name) - .setCondition(Condition.EQUAL); + return buildCriterion(NAME_INDEX_FIELD_NAME, Condition.EQUAL, name); } /** diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java index cac0cca2682e8..537562a3b7d98 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/FormUtils.java @@ -1,5 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.mutate.util; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; @@ -95,11 +97,7 @@ private static Criterion buildFormCriterion( private static Criterion buildFormCriterion( @Nonnull final String formUrn, @Nonnull final String field, final boolean negated) { - return new Criterion() - .setField(field) - .setValue(formUrn) - .setCondition(Condition.EQUAL) - .setNegated(negated); + return buildCriterion(field, Condition.EQUAL, negated, formUrn); } private static boolean isActorExplicitlyAssigned( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java index 28334b2c0af9a..77f6eb285ecc5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/recommendation/ListRecommendationsResolver.java @@ -2,7 +2,6 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; -import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; @@ -179,7 +178,7 @@ private RecommendationParams mapRecommendationParams( criterion -> FacetFilter.builder() .setField(criterion.getField()) - .setValues(ImmutableList.of(criterion.getValue())) + .setValues(criterion.getValues()) .build()) .collect(Collectors.toList())); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java index 2950b50bec8f1..4f7119969b538 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ResolverUtilsTest.java @@ -2,13 +2,13 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.*; import static com.linkedin.metadata.search.utils.QueryUtils.buildFilterWithUrns; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.Mockito.mock; import static org.testng.AssertJUnit.assertEquals; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.TestUtils; import com.linkedin.datahub.graphql.generated.FacetFilterInput; @@ -47,38 +47,21 @@ public void testCriterionFromFilter() throws Exception { FilterOperator.EQUAL)); assertEquals( valuesCriterion, - new Criterion() - .setValue("urn:li:tag:abc") - .setValues(new StringArray(ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def"))) - .setNegated(false) - .setCondition(Condition.EQUAL) - .setField("tags")); + buildCriterion( + "tags", Condition.EQUAL, ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def"))); // this is the legacy pathway Criterion valueCriterion = criterionFromFilter( new FacetFilterInput("tags", "urn:li:tag:abc", null, true, FilterOperator.EQUAL)); - assertEquals( - valueCriterion, - new Criterion() - .setValue("urn:li:tag:abc") - .setValues(new StringArray(ImmutableList.of("urn:li:tag:abc"))) - .setNegated(true) - .setCondition(Condition.EQUAL) - .setField("tags")); + assertEquals(valueCriterion, buildCriterion("tags", Condition.EQUAL, true, "urn:li:tag:abc")); // check that both being null doesn't cause a NPE. this should never happen except via API // interaction Criterion doubleNullCriterion = criterionFromFilter(new FacetFilterInput("tags", null, null, true, FilterOperator.EQUAL)); assertEquals( - doubleNullCriterion, - new Criterion() - .setValue("") - .setValues(new StringArray(ImmutableList.of())) - .setNegated(true) - .setCondition(Condition.EQUAL) - .setField("tags")); + doubleNullCriterion, buildCriterion("tags", Condition.EQUAL, true, ImmutableList.of())); } @Test @@ -89,11 +72,8 @@ public void testBuildFilterWithUrns() throws Exception { urns.add(urn1); urns.add(urn2); - Criterion ownersCriterion = - new Criterion() - .setField("owners") - .setValues(new StringArray("urn:li:corpuser:chris")) - .setCondition(Condition.EQUAL); + Criterion ownersCriterion = buildCriterion("owners", Condition.EQUAL, "urn:li:corpuser:chris"); + CriterionArray andCriterionArray = new CriterionArray(ImmutableList.of(ownersCriterion)); final Filter filter = new Filter(); filter.setOr( @@ -114,11 +94,10 @@ public void testBuildFilterWithUrns() throws Exception { Filter finalFilter = buildFilterWithUrns(appConfig, urns, filter); Criterion urnsCriterion = - new Criterion() - .setField("urn") - .setValue("") - .setValues( - new StringArray(urns.stream().map(Object::toString).collect(Collectors.toList()))); + buildCriterion( + "urn", + Condition.EQUAL, + urns.stream().map(Object::toString).collect(Collectors.toList())); for (ConjunctiveCriterion conjunctiveCriterion : finalFilter.getOr()) { assertEquals(conjunctiveCriterion.getAnd().contains(ownersCriterion), true); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java index 4897d0819b59f..70c606dc63a90 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/browse/BrowseV2ResolverTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.browse; import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import com.google.common.collect.ImmutableList; @@ -21,9 +22,9 @@ import com.linkedin.metadata.browse.BrowseResultGroupV2Array; import com.linkedin.metadata.browse.BrowseResultMetadata; import com.linkedin.metadata.browse.BrowseResultV2; +import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.service.FormService; @@ -282,10 +283,7 @@ private static DataHubViewInfo createViewInfo(StringArray entityNames) { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("test", Condition.EQUAL, "test")))))); DataHubViewInfo info = new DataHubViewInfo(); info.setName("test"); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolverTest.java index 48732727762ee..5af236d7e81e5 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/container/ContainerEntitiesResolverTest.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.container; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.testng.Assert.*; @@ -43,10 +44,7 @@ public void testGetSuccess() throws Exception { final String containerUrn = "urn:li:container:test-container"; final Criterion filterCriterion = - new Criterion() - .setField("container.keyword") - .setCondition(Condition.EQUAL) - .setValue(containerUrn); + buildCriterion("container.keyword", Condition.EQUAL, containerUrn); Mockito.when( mockClient.searchAcrossEntities( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java index ad5d7f1ef6b06..5be65703846a9 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/DomainEntitiesResolverTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.domain; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.testng.Assert.*; @@ -45,11 +46,7 @@ public void testGetSuccess() throws Exception { final String childUrn = "urn:li:dataset:(test,test,test)"; final String domainUrn = "urn:li:domain:test-domain"; - final Criterion filterCriterion = - new Criterion() - .setField("domains.keyword") - .setCondition(Condition.EQUAL) - .setValue(domainUrn); + final Criterion filterCriterion = buildCriterion("domains.keyword", Condition.EQUAL, domainUrn); Mockito.when( mockClient.searchAcrossEntities( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolverTest.java index 60787fc47c88a..cb9df747572c4 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryNodesResolverTest.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.glossary; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.testng.Assert.*; @@ -16,7 +17,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntity; @@ -76,11 +76,7 @@ public void testGetSuccess() throws Exception { private Filter buildGlossaryEntitiesFilter() { CriterionArray array = new CriterionArray( - ImmutableList.of( - new Criterion() - .setField("hasParentNode") - .setValue("false") - .setCondition(Condition.EQUAL))); + ImmutableList.of(buildCriterion("hasParentNode", Condition.EQUAL, "false"))); final Filter filter = new Filter(); filter.setOr( new ConjunctiveCriterionArray(ImmutableList.of(new ConjunctiveCriterion().setAnd(array)))); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolverTest.java index 51760ff9d37f2..29af303f7db2d 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/glossary/GetRootGlossaryTermsResolverTest.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.glossary; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.testng.Assert.*; @@ -16,7 +17,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntity; @@ -76,11 +76,7 @@ public void testGetSuccess() throws Exception { private Filter buildGlossaryEntitiesFilter() { CriterionArray array = new CriterionArray( - ImmutableList.of( - new Criterion() - .setField("hasParentNode") - .setValue("false") - .setCondition(Condition.EQUAL))); + ImmutableList.of(buildCriterion("hasParentNode", Condition.EQUAL, "false"))); final Filter filter = new Filter(); filter.setOr( new ConjunctiveCriterionArray(ImmutableList.of(new ConjunctiveCriterion().setAnd(array)))); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java index 129866bb0fa07..1b33118bd154a 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AggregateAcrossEntitiesResolverTest.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import com.google.common.collect.ImmutableList; @@ -21,7 +22,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntityArray; @@ -348,13 +348,7 @@ private static Filter createFilter(String field, String value) { new ConjunctiveCriterion() .setAnd( new CriterionArray( - ImmutableList.of( - new Criterion() - .setField(field) - .setValue(value) - .setCondition(Condition.EQUAL) - .setNegated(false) - .setValues(new StringArray(ImmutableList.of(value)))))))); + ImmutableList.of(buildCriterion(field, Condition.EQUAL, value)))))); } private static DataHubViewInfo getViewInfo(Filter viewFilter) { diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteForMultipleResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteForMultipleResolverTest.java index 45190bbfc93e5..17ed6ef5632a1 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteForMultipleResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/AutoCompleteForMultipleResolverTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.search; import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import com.google.common.collect.ImmutableList; @@ -20,9 +21,9 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.query.AutoCompleteEntityArray; import com.linkedin.metadata.query.AutoCompleteResult; +import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.service.ViewService; @@ -264,10 +265,7 @@ private static DataHubViewInfo createViewInfo(StringArray entityNames) { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("field", Condition.EQUAL, "test")))))); DataHubViewInfo info = new DataHubViewInfo(); info.setName("test"); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java index 86508f1fd2742..a601a815453b2 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchAcrossEntitiesResolverTest.java @@ -2,6 +2,7 @@ import static com.linkedin.datahub.graphql.TestUtils.*; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import com.google.common.collect.ImmutableList; @@ -21,7 +22,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntityArray; @@ -57,10 +57,7 @@ public static void testApplyViewNullBaseFilter() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("field", Condition.EQUAL, "test")))))); DataHubViewInfo info = new DataHubViewInfo(); info.setName("test"); @@ -135,10 +132,7 @@ public static void testApplyViewBaseFilter() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("field", Condition.EQUAL, "test")))))); DataHubViewInfo info = new DataHubViewInfo(); info.setName("test"); @@ -163,13 +157,7 @@ public static void testApplyViewBaseFilter() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("baseField") - .setValue("baseTest") - .setCondition(Condition.EQUAL) - .setNegated(false) - .setValues( - new StringArray(ImmutableList.of("baseTest")))))))); + buildCriterion("baseField", Condition.EQUAL, "baseTest")))))); EntityClient mockClient = initMockEntityClient( @@ -236,10 +224,7 @@ public static void testApplyViewNullBaseEntityTypes() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("field", Condition.EQUAL, "test")))))); DataHubViewInfo info = new DataHubViewInfo(); info.setName("test"); @@ -306,10 +291,7 @@ public static void testApplyViewEmptyBaseEntityTypes() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("field", Condition.EQUAL, "test")))))); DataHubViewInfo info = new DataHubViewInfo(); info.setName("test"); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtilsTest.java index 8f23f0a624576..832aa97d9216a 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtilsTest.java @@ -1,11 +1,12 @@ package com.linkedin.datahub.graphql.resolvers.search; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.google.common.collect.ImmutableList; -import com.linkedin.data.template.StringArray; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import java.util.List; @@ -25,10 +26,7 @@ public static void testApplyViewToFilterNullBaseFilter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("field", Condition.EQUAL, "test")))))); Filter result = SearchUtils.combineFilters(null, viewFilter); Assert.assertEquals(viewFilter, result); @@ -45,28 +43,14 @@ public static void testApplyViewToFilterComplexBaseFilter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field1") - .setValue("test1") - .setValues(new StringArray(ImmutableList.of("test1"))), - new Criterion() - .setField("field2") - .setValue("test2") - .setValues( - new StringArray(ImmutableList.of("test2")))))), + buildCriterion("field1", Condition.EQUAL, "test1"), + buildCriterion("field2", Condition.EQUAL, "test2")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field3") - .setValue("test3") - .setValues(new StringArray(ImmutableList.of("test3"))), - new Criterion() - .setField("field4") - .setValue("test4") - .setValues( - new StringArray(ImmutableList.of("test4"))))))))); + buildCriterion("field3", Condition.EQUAL, "test3"), + buildCriterion("field4", Condition.EQUAL, "test4"))))))); Filter viewFilter = new Filter() @@ -76,10 +60,7 @@ public static void testApplyViewToFilterComplexBaseFilter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setValue("test") - .setValues(new StringArray(ImmutableList.of("test")))))))); + buildCriterion("field", Condition.EQUAL, "test")))))); Filter result = SearchUtils.combineFilters(baseFilter, viewFilter); @@ -92,36 +73,16 @@ public static void testApplyViewToFilterComplexBaseFilter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field1") - .setValue("test1") - .setValues(new StringArray(ImmutableList.of("test1"))), - new Criterion() - .setField("field2") - .setValue("test2") - .setValues(new StringArray(ImmutableList.of("test2"))), - new Criterion() - .setField("field") - .setValue("test") - .setValues( - new StringArray(ImmutableList.of("test")))))), + buildCriterion("field1", Condition.EQUAL, "test1"), + buildCriterion("field2", Condition.EQUAL, "test2"), + buildCriterion("field", Condition.EQUAL, "test")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field3") - .setValue("test3") - .setValues(new StringArray(ImmutableList.of("test3"))), - new Criterion() - .setField("field4") - .setValue("test4") - .setValues(new StringArray(ImmutableList.of("test4"))), - new Criterion() - .setField("field") - .setValue("test") - .setValues( - new StringArray(ImmutableList.of("test"))))))))); + buildCriterion("field3", Condition.EQUAL, "test3"), + buildCriterion("field4", Condition.EQUAL, "test4"), + buildCriterion("field", Condition.EQUAL, "test"))))))); Assert.assertEquals(expectedResult, result); } @@ -137,28 +98,14 @@ public static void testApplyViewToFilterComplexViewFilter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field1") - .setValue("test1") - .setValues(new StringArray(ImmutableList.of("test1"))), - new Criterion() - .setField("field2") - .setValue("test2") - .setValues( - new StringArray(ImmutableList.of("test2")))))), + buildCriterion("field1", Condition.EQUAL, "test1"), + buildCriterion("field2", Condition.EQUAL, "test2")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field3") - .setValue("test3") - .setValues(new StringArray(ImmutableList.of("test3"))), - new Criterion() - .setField("field4") - .setValue("test4") - .setValues( - new StringArray(ImmutableList.of("test4"))))))))); + buildCriterion("field3", Condition.EQUAL, "test3"), + buildCriterion("field4", Condition.EQUAL, "test4"))))))); Filter viewFilter = new Filter() @@ -169,31 +116,16 @@ public static void testApplyViewToFilterComplexViewFilter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("viewField1") - .setValue("viewTest1") - .setValues( - new StringArray(ImmutableList.of("viewTest1"))), - new Criterion() - .setField("viewField2") - .setValue("viewTest2") - .setValues( - new StringArray(ImmutableList.of("viewTest2")))))), + buildCriterion("viewField1", Condition.EQUAL, "viewTest1"), + buildCriterion( + "viewField2", Condition.EQUAL, "viewTest2")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("viewField3") - .setValue("viewTest3") - .setValues( - new StringArray(ImmutableList.of("viewTest3"))), - new Criterion() - .setField("viewField4") - .setValue("viewTest4") - .setValues( - new StringArray( - ImmutableList.of("viewTest4"))))))))); + buildCriterion("viewField3", Condition.EQUAL, "viewTest3"), + buildCriterion( + "viewField4", Condition.EQUAL, "viewTest4"))))))); Filter result = SearchUtils.combineFilters(baseFilter, viewFilter); @@ -206,91 +138,38 @@ public static void testApplyViewToFilterComplexViewFilter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field1") - .setValue("test1") - .setValues(new StringArray(ImmutableList.of("test1"))), - new Criterion() - .setField("field2") - .setValue("test2") - .setValues(new StringArray(ImmutableList.of("test2"))), - new Criterion() - .setField("viewField1") - .setValue("viewTest1") - .setValues( - new StringArray(ImmutableList.of("viewTest1"))), - new Criterion() - .setField("viewField2") - .setValue("viewTest2") - .setValues( - new StringArray(ImmutableList.of("viewTest2")))))), + buildCriterion("field1", Condition.EQUAL, "test1"), + buildCriterion("field2", Condition.EQUAL, "test2"), + buildCriterion("viewField1", Condition.EQUAL, "viewTest1"), + buildCriterion( + "viewField2", Condition.EQUAL, "viewTest2")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field1") - .setValue("test1") - .setValues(new StringArray(ImmutableList.of("test1"))), - new Criterion() - .setField("field2") - .setValue("test2") - .setValues(new StringArray(ImmutableList.of("test2"))), - new Criterion() - .setField("viewField3") - .setValue("viewTest3") - .setValues( - new StringArray(ImmutableList.of("viewTest3"))), - new Criterion() - .setField("viewField4") - .setValue("viewTest4") - .setValues( - new StringArray(ImmutableList.of("viewTest4")))))), + buildCriterion("field1", Condition.EQUAL, "test1"), + buildCriterion("field2", Condition.EQUAL, "test2"), + buildCriterion("viewField3", Condition.EQUAL, "viewTest3"), + buildCriterion( + "viewField4", Condition.EQUAL, "viewTest4")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field3") - .setValue("test3") - .setValues(new StringArray(ImmutableList.of("test3"))), - new Criterion() - .setField("field4") - .setValue("test4") - .setValues(new StringArray(ImmutableList.of("test4"))), - new Criterion() - .setField("viewField1") - .setValue("viewTest1") - .setValues( - new StringArray(ImmutableList.of("viewTest1"))), - new Criterion() - .setField("viewField2") - .setValue("viewTest2") - .setValues( - new StringArray(ImmutableList.of("viewTest2")))))), + buildCriterion("field3", Condition.EQUAL, "test3"), + buildCriterion("field4", Condition.EQUAL, "test4"), + buildCriterion("viewField1", Condition.EQUAL, "viewTest1"), + buildCriterion( + "viewField2", Condition.EQUAL, "viewTest2")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field3") - .setValue("test3") - .setValues(new StringArray(ImmutableList.of("test3"))), - new Criterion() - .setField("field4") - .setValue("test4") - .setValues(new StringArray(ImmutableList.of("test4"))), - new Criterion() - .setField("viewField3") - .setValue("viewTest3") - .setValues( - new StringArray(ImmutableList.of("viewTest3"))), - new Criterion() - .setField("viewField4") - .setValue("viewTest4") - .setValues( - new StringArray( - ImmutableList.of("viewTest4"))))))))); + buildCriterion("field3", Condition.EQUAL, "test3"), + buildCriterion("field4", Condition.EQUAL, "test4"), + buildCriterion("viewField3", Condition.EQUAL, "viewTest3"), + buildCriterion( + "viewField4", Condition.EQUAL, "viewTest4"))))))); Assert.assertEquals(expectedResult, result); } @@ -302,28 +181,16 @@ public static void testApplyViewToFilterV1Filter() { .setCriteria( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field1") - .setValue("test1") - .setValues(new StringArray(ImmutableList.of("test1"))), - new Criterion() - .setField("field2") - .setValue("test2") - .setValues(new StringArray(ImmutableList.of("test2")))))); + buildCriterion("field1", Condition.EQUAL, "test1"), + buildCriterion("field2", Condition.EQUAL, "test2")))); Filter viewFilter = new Filter() .setCriteria( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("viewField1") - .setValue("viewTest1") - .setValues(new StringArray(ImmutableList.of("viewTest1"))), - new Criterion() - .setField("viewField2") - .setValue("viewTest2") - .setValues(new StringArray(ImmutableList.of("viewTest2")))))); + buildCriterion("viewField1", Condition.EQUAL, "viewTest1"), + buildCriterion("viewField2", Condition.EQUAL, "viewTest2")))); Filter result = SearchUtils.combineFilters(baseFilter, viewFilter); @@ -336,25 +203,11 @@ public static void testApplyViewToFilterV1Filter() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field1") - .setValue("test1") - .setValues(new StringArray(ImmutableList.of("test1"))), - new Criterion() - .setField("field2") - .setValue("test2") - .setValues(new StringArray(ImmutableList.of("test2"))), - new Criterion() - .setField("viewField1") - .setValue("viewTest1") - .setValues( - new StringArray(ImmutableList.of("viewTest1"))), - new Criterion() - .setField("viewField2") - .setValue("viewTest2") - .setValues( - new StringArray( - ImmutableList.of("viewTest2"))))))))); + buildCriterion("field1", Condition.EQUAL, "test1"), + buildCriterion("field2", Condition.EQUAL, "test2"), + buildCriterion("viewField1", Condition.EQUAL, "viewTest1"), + buildCriterion( + "viewField2", Condition.EQUAL, "viewTest2"))))))); Assert.assertEquals(expectedResult, result); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolverTest.java index c009cf37c5397..8f638e4ff9257 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/CreateViewResolverTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.view; import static com.linkedin.datahub.graphql.TestUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.testng.Assert.*; @@ -24,7 +25,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.service.ViewService; @@ -108,28 +108,17 @@ public void testGetSuccess() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setCondition(Condition.EQUAL) - .setField("test1") - .setValue( - "value1") // Unfortunate --- For - // backwards compat. - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setNegated(false), - new Criterion() - .setCondition(Condition.IN) - .setField("test2") - .setValue( - "value1") // Unfortunate --- For - // backwards compat. - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setNegated(true))))))))), + buildCriterion( + "test1", + Condition.EQUAL, + "value1", + "value2"), + buildCriterion( + "test2", + Condition.IN, + true, + "value1", + "value2"))))))))), Mockito.anyLong()); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolverTest.java index b5c0531db792b..8fa3b098f2041 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListGlobalViewsResolverTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.view; import static com.linkedin.datahub.graphql.TestUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.testng.Assert.*; @@ -8,7 +9,6 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.DataHubViewType; import com.linkedin.datahub.graphql.generated.ListGlobalViewsInput; @@ -18,7 +18,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntity; @@ -55,16 +54,10 @@ public void testGetSuccessInput() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("type") - .setValue(DataHubViewType.GLOBAL.toString()) - .setValues( - new StringArray( - ImmutableList.of( - DataHubViewType.GLOBAL - .toString()))) - .setCondition(Condition.EQUAL) - .setNegated(false)))))))), + buildCriterion( + "type", + Condition.EQUAL, + DataHubViewType.GLOBAL.toString())))))))), Mockito.any(), Mockito.eq(0), Mockito.eq(20))) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolverTest.java index 85d24f9251eaa..278aeef7b8da4 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ListMyViewsResolverTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.view; import static com.linkedin.datahub.graphql.TestUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.testng.Assert.*; @@ -8,7 +9,6 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.DataHubViewType; import com.linkedin.datahub.graphql.generated.ListMyViewsInput; @@ -17,7 +17,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.SearchEntity; @@ -57,25 +56,14 @@ public void testGetSuccessInput1() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("createdBy") - .setValue(TEST_USER.toString()) - .setValues( - new StringArray( - ImmutableList.of( - TEST_USER.toString()))) - .setCondition(Condition.EQUAL) - .setNegated(false), - new Criterion() - .setField("type") - .setValue(DataHubViewType.GLOBAL.toString()) - .setValues( - new StringArray( - ImmutableList.of( - DataHubViewType.GLOBAL - .toString()))) - .setCondition(Condition.EQUAL) - .setNegated(false)))))))), + buildCriterion( + "createdBy", + Condition.EQUAL, + TEST_USER.toString()), + buildCriterion( + "type", + Condition.EQUAL, + DataHubViewType.GLOBAL.toString())))))))), Mockito.any(), Mockito.eq(0), Mockito.eq(20))) @@ -123,15 +111,10 @@ public void testGetSuccessInput2() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("createdBy") - .setValue(TEST_USER.toString()) - .setValues( - new StringArray( - ImmutableList.of( - TEST_USER.toString()))) - .setCondition(Condition.EQUAL) - .setNegated(false)))))))), + buildCriterion( + "createdBy", + Condition.EQUAL, + TEST_USER.toString())))))))), Mockito.any(), Mockito.eq(0), Mockito.eq(20))) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolverTest.java index 86a502b40b936..45c2c31e3159b 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/UpdateViewResolverTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.view; import static com.linkedin.datahub.graphql.TestUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.testng.Assert.*; @@ -30,7 +31,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.service.ViewService; @@ -109,28 +109,17 @@ public void testGetSuccessGlobalViewIsCreator() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setCondition(Condition.EQUAL) - .setField("test1") - .setValue( - "value1") // Unfortunate --- For - // backwards compat. - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setNegated(false), - new Criterion() - .setCondition(Condition.IN) - .setField("test2") - .setValue( - "value1") // Unfortunate --- For - // backwards compat. - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setNegated(true))))))))), + buildCriterion( + "test1", + Condition.EQUAL, + "value1", + "value2"), + buildCriterion( + "test2", + Condition.IN, + true, + "value1", + "value2"))))))))), Mockito.anyLong()); } @@ -173,28 +162,17 @@ public void testGetSuccessGlobalViewManageGlobalViews() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setCondition(Condition.EQUAL) - .setField("test1") - .setValue( - "value1") // Unfortunate --- For - // backwards compat. - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setNegated(false), - new Criterion() - .setCondition(Condition.IN) - .setField("test2") - .setValue( - "value1") // Unfortunate --- For - // backwards compat. - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setNegated(true))))))))), + buildCriterion( + "test1", + Condition.EQUAL, + "value1", + "value2"), + buildCriterion( + "test2", + Condition.IN, + true, + "value1", + "value2"))))))))), Mockito.anyLong()); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java index d142be1321a5c..c33aa629138f1 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/view/ViewUtilsTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.view; import static com.linkedin.datahub.graphql.TestUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.testng.Assert.*; @@ -22,7 +23,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.service.ViewService; @@ -148,26 +148,14 @@ public void testMapDefinition() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setNegated(false) - .setValues( - new StringArray( - ImmutableList.of("value1", "value2"))) - .setValue("value1") // Disgraceful - .setField("test1") // Consider whether we - // should NOT go through - // the keyword mapping. - .setCondition(Condition.IN), - new Criterion() - .setNegated(true) - .setValues( - new StringArray( - ImmutableList.of("value3", "value4"))) - .setValue("value3") // Disgraceful - .setField("test2") // Consider whether we - // should NOT go through - // the keyword mapping. - .setCondition(Condition.CONTAIN)))))))); + buildCriterion( + "test1", Condition.IN, "value1", "value2"), + buildCriterion( + "test2", + Condition.CONTAIN, + true, + "value3", + "value4")))))))); assertEquals(ViewUtils.mapDefinition(input, mock(AspectRetriever.class)), expectedResult); } diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/view/DataHubViewTypeTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/view/DataHubViewTypeTest.java index 685cccf27ccc0..8f3750c39f670 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/view/DataHubViewTypeTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/view/DataHubViewTypeTest.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.types.view; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.testng.Assert.*; @@ -25,7 +26,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.r2.RemoteInvocationException; @@ -74,13 +74,11 @@ public class DataHubViewTypeTest { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setField("test") - .setCondition(Condition.EQUAL)))))))) + buildCriterion( + "test", + Condition.EQUAL, + "value1", + "value2")))))))) .setEntityTypes( new StringArray( ImmutableList.of( @@ -110,38 +108,30 @@ public class DataHubViewTypeTest { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setField("test") - .setCondition(Condition.EQUAL), - new Criterion() - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setField("test2") - .setCondition(Condition.EQUAL)))), + buildCriterion( + "test", + Condition.EQUAL, + "value1", + "value2"), + buildCriterion( + "test2", + Condition.EQUAL, + "value1", + "value2")))), new ConjunctiveCriterion() .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setField("test2") - .setCondition(Condition.EQUAL), - new Criterion() - .setValues( - new StringArray( - ImmutableList.of( - "value1", "value2"))) - .setField("test2") - .setCondition(Condition.EQUAL)))))))) + buildCriterion( + "test2", + Condition.EQUAL, + "value1", + "value2"), + buildCriterion( + "test2", + Condition.EQUAL, + "value1", + "value2")))))))) .setEntityTypes( new StringArray( ImmutableList.of( diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/browsepaths/BackfillBrowsePathsV2Step.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/browsepaths/BackfillBrowsePathsV2Step.java index e6213a164febf..c197c6fd9c610 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/browsepaths/BackfillBrowsePathsV2Step.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/browsepaths/BackfillBrowsePathsV2Step.java @@ -1,6 +1,9 @@ package com.linkedin.datahub.upgrade.system.browsepaths; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; +import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; +import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion; import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; import com.google.common.collect.ImmutableList; @@ -9,7 +12,6 @@ import com.linkedin.common.BrowsePathsV2; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.data.template.StringArray; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; @@ -152,13 +154,10 @@ private String backfillBrowsePathsV2(String entityType, AuditStamp auditStamp, S private Filter backfillBrowsePathsV2Filter() { // Condition: has `browsePaths` AND does NOT have `browsePathV2` - Criterion missingBrowsePathV2 = new Criterion(); - missingBrowsePathV2.setCondition(Condition.IS_NULL); - missingBrowsePathV2.setField("browsePathV2"); + Criterion missingBrowsePathV2 = buildIsNullCriterion("browsePathV2"); + // Excludes entities without browsePaths - Criterion hasBrowsePathV1 = new Criterion(); - hasBrowsePathV1.setCondition(Condition.EXISTS); - hasBrowsePathV1.setField("browsePaths"); + Criterion hasBrowsePathV1 = buildExistsCriterion("browsePaths"); CriterionArray criterionArray = new CriterionArray(); criterionArray.add(missingBrowsePathV2); @@ -177,13 +176,8 @@ private Filter backfillBrowsePathsV2Filter() { private Filter backfillDefaultBrowsePathsV2Filter() { // Condition: has default `browsePathV2` - Criterion hasDefaultBrowsePathV2 = new Criterion(); - hasDefaultBrowsePathV2.setCondition(Condition.EQUAL); - hasDefaultBrowsePathV2.setField("browsePathV2"); - StringArray values = new StringArray(); - values.add(DEFAULT_BROWSE_PATH_V2); - hasDefaultBrowsePathV2.setValues(values); - hasDefaultBrowsePathV2.setValue(DEFAULT_BROWSE_PATH_V2); // not used, but required field? + Criterion hasDefaultBrowsePathV2 = + buildCriterion("browsePathV2", Condition.EQUAL, DEFAULT_BROWSE_PATH_V2); CriterionArray criterionArray = new CriterionArray(); criterionArray.add(hasDefaultBrowsePathV2); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/policyfields/BackfillPolicyFieldsStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/policyfields/BackfillPolicyFieldsStep.java index ad28e6b6382d4..42361ebe59b85 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/policyfields/BackfillPolicyFieldsStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/policyfields/BackfillPolicyFieldsStep.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.upgrade.system.policyfields; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion; import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; import com.google.common.collect.ImmutableList; @@ -17,7 +18,6 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.boot.BootstrapStep; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; import com.linkedin.metadata.query.filter.Criterion; @@ -246,9 +246,7 @@ private Optional> ingestPolicyFields( @NotNull private static ConjunctiveCriterion getCriterionForMissingField(String field) { - final Criterion missingPrivilegesField = new Criterion(); - missingPrivilegesField.setCondition(Condition.IS_NULL); - missingPrivilegesField.setField(field); + final Criterion missingPrivilegesField = buildIsNullCriterion(field); final CriterionArray criterionArray = new CriterionArray(); criterionArray.add(missingPrivilegesField); diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index ea53c2b470060..d002331e3929e 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -20,8 +20,9 @@ This file documents any backwards-incompatible changes in DataHub and assists pe ### Breaking Changes -- Metadata service authentication enabled by default -- Rest API authorization enabled by default +- #11486 - Deprecated Criterion filters using `value`. Use `values` instead. This also deprecates the ability to use comma delimited string to represent multiple values using `value`. +- #11484 - Metadata service authentication enabled by default +- #11484 - Rest API authorization enabled by default ### Potential Downtime diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java index 352e89baefc25..1c4a986e24b04 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/dgraph/DgraphGraphService.java @@ -367,7 +367,7 @@ protected static String getQueryForRelatedEntities( filters.add( String.format( "%s as var(func: eq(<%s>, \"%s\"))", - sourceFilterName, criterion.getField(), criterion.getValue())); + sourceFilterName, criterion.getField(), criterion.getValues().get(0))); }); } @@ -384,7 +384,7 @@ protected static String getQueryForRelatedEntities( filters.add( String.format( "%s as var(func: eq(<%s>, \"%s\"))", - sourceFilterName, criterion.getField(), criterion.getValue())); + sourceFilterName, criterion.getField(), criterion.getValues().get(0))); }); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index 40fa79a0ef171..8c7f0e3256cf8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -123,9 +123,9 @@ private static void addFilterToQueryBuilder( criterionArray.forEach( criterion -> andQuery.filter( - QueryBuilders.termQuery( + QueryBuilders.termsQuery( (node == null ? "" : node + ".") + criterion.getField(), - criterion.getValue()))); + criterion.getValues()))); orQuery.should(andQuery); } rootQuery.filter(orQuery); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java index 1769c53e4cd9b..1068fae9478e1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphService.java @@ -3,12 +3,12 @@ import static com.linkedin.metadata.aspect.models.graph.Edge.*; import static com.linkedin.metadata.graph.elastic.GraphFilterUtils.getUrnStatusFieldName; import static com.linkedin.metadata.graph.elastic.GraphFilterUtils.getUrnStatusQuery; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.StringArray; import com.linkedin.metadata.aspect.models.graph.Edge; import com.linkedin.metadata.aspect.models.graph.EdgeUrnType; import com.linkedin.metadata.aspect.models.graph.RelatedEntities; @@ -229,11 +229,7 @@ public EntityLineageResult getLineage( private static Filter createUrnFilter(@Nonnull final Urn urn) { Filter filter = new Filter(); CriterionArray criterionArray = new CriterionArray(); - Criterion criterion = new Criterion(); - criterion.setCondition(Condition.EQUAL); - criterion.setField("urn"); - criterion.setValue(urn.toString()); - criterion.setValues(new StringArray(urn.toString())); + Criterion criterion = buildCriterion("urn", Condition.EQUAL, urn.toString()); criterionArray.add(criterion); filter.setOr( new ConjunctiveCriterionArray( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java index 75d993f52680a..ef748ebd23278 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/neo4j/Neo4jGraphService.java @@ -489,7 +489,7 @@ public RelatedEntitiesResult findRelatedEntities( // Create a URN from the String. Only proceed if srcCriteria is not null or empty if (StringUtils.isNotEmpty(srcCriteria)) { final String urnValue = - sourceEntityFilter.getOr().get(0).getAnd().get(0).getValue().toString(); + sourceEntityFilter.getOr().get(0).getAnd().get(0).getValues().get(0).toString(); try { final Urn urn = Urn.createFromString(urnValue); srcNodeLabel = urn.getEntityType(); @@ -840,7 +840,8 @@ private static String criterionToString(@Nonnull CriterionArray criterionArray) final StringJoiner joiner = new StringJoiner(",", "{", "}"); criterionArray.forEach( - criterion -> joiner.add(toCriterionString(criterion.getField(), criterion.getValue()))); + criterion -> + joiner.add(toCriterionString(criterion.getField(), criterion.getValues().get(0)))); return joiner.length() <= 2 ? "" : joiner.toString(); } @@ -943,7 +944,7 @@ public RelatedEntitiesScrollResult scrollRelatedEntities( // Create a URN from the String. Only proceed if srcCriteria is not null or empty if (StringUtils.isNotEmpty(srcCriteria)) { final String urnValue = - sourceEntityFilter.getOr().get(0).getAnd().get(0).getValue().toString(); + sourceEntityFilter.getOr().get(0).getAnd().get(0).getValues().get(0).toString(); try { final Urn urn = Urn.createFromString(urnValue); srcNodeLabel = urn.getEntityType(); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index ec9c44e42f7f4..67ebdf8882b80 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -26,7 +26,6 @@ import com.linkedin.metadata.query.GroupingSpec; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -358,14 +357,14 @@ LineageSearchResult getLightningSearchResult( .map(ConjunctiveCriterion::getAnd) .flatMap(CriterionArray::stream) .filter(criterion -> "platform".equals(criterion.getField())) - .map(Criterion::getValue) + .flatMap(criterion -> criterion.getValues().stream()) .collect(Collectors.toSet()); originCriteriaValues = inputFilters.getOr().stream() .map(ConjunctiveCriterion::getAnd) .flatMap(CriterionArray::stream) .filter(criterion -> "origin".equals(criterion.getField())) - .map(Criterion::getValue) + .flatMap(criterion -> criterion.getValues().stream()) .collect(Collectors.toSet()); } boolean isNotFiltered = diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java index f1c42a1d277da..61bba11098fae 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESBrowseDAO.java @@ -125,7 +125,7 @@ public BrowseResult browse( @Nullable Filter filters, int from, int size) { - final Map requestMap = SearchUtils.getRequestMap(filters); + final Map> requestMap = SearchUtils.getRequestMap(filters); final OperationContext finalOpContext = opContext.withSearchFlags( @@ -213,7 +213,7 @@ protected SearchRequest constructGroupsSearchRequest( @Nonnull OperationContext opContext, @Nonnull String indexName, @Nonnull String path, - @Nonnull Map requestMap) { + @Nonnull Map> requestMap) { final SearchRequest searchRequest = new SearchRequest(indexName); final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); searchSourceBuilder.size(0); @@ -235,7 +235,7 @@ protected SearchRequest constructGroupsSearchRequest( private QueryBuilder buildQueryString( @Nonnull OperationContext opContext, @Nonnull String path, - @Nonnull Map requestMap, + @Nonnull Map> requestMap, boolean isGroupQuery) { final int browseDepthVal = getPathDepth(path); @@ -253,7 +253,7 @@ private QueryBuilder buildQueryString( queryBuilder.filter(QueryBuilders.termQuery(BROWSE_PATH_DEPTH, browseDepthVal)); } - requestMap.forEach((field, val) -> queryBuilder.filter(QueryBuilders.termQuery(field, val))); + requestMap.forEach((field, vals) -> queryBuilder.filter(QueryBuilders.termsQuery(field, vals))); return queryBuilder; } @@ -272,7 +272,7 @@ SearchRequest constructEntitiesSearchRequest( @Nonnull OperationContext opContext, @Nonnull String indexName, @Nonnull String path, - @Nonnull Map requestMap, + @Nonnull Map> requestMap, int from, int size) { final SearchRequest searchRequest = new SearchRequest(indexName); @@ -302,7 +302,7 @@ SearchRequest constructEntitiesSearchRequest( @Nonnull OperationContext opContext, @Nonnull String indexName, @Nonnull String path, - @Nonnull Map requestMap, + @Nonnull Map> requestMap, @Nullable Object[] sort, @Nullable String pitId, @Nonnull String keepAlive, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java index 800d59bacc1d8..367705d369c7c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/filter/BaseQueryFilterRewriter.java @@ -1,14 +1,15 @@ package com.linkedin.metadata.search.elasticsearch.query.filter; import static com.linkedin.metadata.search.utils.QueryUtils.EMPTY_FILTER; -import static com.linkedin.metadata.search.utils.QueryUtils.newCriterion; import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.metadata.aspect.GraphRetriever; import com.linkedin.metadata.aspect.models.graph.Edge; import com.linkedin.metadata.aspect.models.graph.RelatedEntitiesScrollResult; +import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.RelationshipDirection; import com.linkedin.metadata.search.utils.QueryUtils; import io.datahubproject.metadata.context.OperationContext; @@ -209,7 +210,7 @@ private static void scrollGraph( graphRetriever.consumeRelatedEntities( consumer, entityTypes, - QueryUtils.newDisjunctiveFilter(newCriterion("urn", queryUrnStrs)), + QueryUtils.newDisjunctiveFilter(buildCriterion("urn", Condition.EQUAL, queryUrnStrs)), entityTypes, EMPTY_FILTER, relationshipTypes, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java index fa2eef964e006..39f69ed1716ab 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilder.java @@ -421,8 +421,6 @@ private void addCriterionFiltersToAggregationMetadata( .forEach( value -> addMissingAggregationValueToAggregationMetadata(value, originalAggMetadata)); - } else { - addMissingAggregationValueToAggregationMetadata(criterion.getValue(), originalAggMetadata); } } else { /* diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java index 78bb8cb1e41ae..ace7fa2bc197c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java @@ -8,7 +8,7 @@ import static com.linkedin.metadata.search.elasticsearch.indexbuilder.MappingsBuilder.SUBFIELDS; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.KEYWORD_FIELDS; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchFieldConfig.PATH_HIERARCHY_FIELDS; -import static com.linkedin.metadata.search.utils.SearchUtils.isUrn; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.google.common.collect.ImmutableList; import com.linkedin.metadata.aspect.AspectRetriever; @@ -25,7 +25,6 @@ import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriteChain; import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterContext; import io.datahubproject.metadata.context.OperationContext; -import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -177,9 +176,7 @@ public static BoolQueryBuilder buildFilterQuery( .getCriteria() .forEach( criterion -> { - if (!criterion.getValue().trim().isEmpty() - || criterion.hasValues() - || criterion.getCondition() == Condition.IS_NULL) { + if (criterion.hasValues() || criterion.getCondition() == Condition.IS_NULL) { andQueryBuilder.must( getQueryBuilderFromCriterion( criterion, @@ -209,7 +206,6 @@ public static BoolQueryBuilder buildConjunctiveFilterQuery( .forEach( criterion -> { if (Set.of(Condition.EXISTS, Condition.IS_NULL).contains(criterion.getCondition()) - || (criterion.hasValue() && !criterion.getValue().trim().isEmpty()) || criterion.hasValues()) { if (!criterion.isNegated()) { // `filter` instead of `must` (enables caching and bypasses scoring) @@ -539,20 +535,13 @@ private static QueryBuilder getQueryBuilderFromCriterionForFieldToExpand( @Nonnull QueryFilterRewriteChain queryFilterRewriteChain) { final BoolQueryBuilder orQueryBuilder = new BoolQueryBuilder(); for (String field : fields) { - Criterion criterionToQuery = new Criterion(); - criterionToQuery.setCondition(criterion.getCondition()); - criterionToQuery.setNegated(criterion.isNegated()); - if (criterion.hasValues()) { - criterionToQuery.setValues(criterion.getValues()); - } - if (criterion.hasValue()) { - criterionToQuery.setValue(criterion.getValue()); - } - criterionToQuery.setField( - toKeywordField(field, isTimeseries, opContext.getAspectRetriever())); orQueryBuilder.should( getQueryBuilderFromCriterionForSingleField( - criterionToQuery, + buildCriterion( + toKeywordField(field, isTimeseries, opContext.getAspectRetriever()), + criterion.getCondition(), + criterion.isNegated(), + criterion.getValues()), isTimeseries, searchableFieldTypes, null, @@ -583,7 +572,7 @@ private static QueryBuilder getQueryBuilderFromCriterionForSingleField( return QueryBuilders.boolQuery() .must(QueryBuilders.existsQuery(fieldName)) .queryName(queryName != null ? queryName : fieldName); - } else if (criterion.hasValues() || criterion.hasValue()) { + } else if (criterion.hasValues()) { if (condition == Condition.EQUAL) { return buildEqualsConditionFromCriterion( fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever) @@ -643,21 +632,6 @@ private static QueryBuilder buildWildcardQueryWithMultipleValues( return boolQuery; } - private static QueryBuilder buildWildcardQueryWithSingleValue( - @Nonnull final String fieldName, - @Nonnull final Criterion criterion, - final boolean isTimeseries, - @Nullable String queryName, - @Nonnull AspectRetriever aspectRetriever, - String wildcardPattern) { - return QueryBuilders.wildcardQuery( - toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), - String.format( - wildcardPattern, ESUtils.escapeReservedCharacters(criterion.getValue().trim()))) - .queryName(queryName != null ? queryName : fieldName) - .caseInsensitive(true); - } - private static QueryBuilder buildContainsConditionFromCriterion( @Nonnull final String fieldName, @Nonnull final Criterion criterion, @@ -665,11 +639,7 @@ private static QueryBuilder buildContainsConditionFromCriterion( final boolean isTimeseries, @Nonnull AspectRetriever aspectRetriever) { - if (!criterion.getValues().isEmpty()) { - return buildWildcardQueryWithMultipleValues( - fieldName, criterion, isTimeseries, queryName, aspectRetriever, "*%s*"); - } - return buildWildcardQueryWithSingleValue( + return buildWildcardQueryWithMultipleValues( fieldName, criterion, isTimeseries, queryName, aspectRetriever, "*%s*"); } @@ -680,11 +650,7 @@ private static QueryBuilder buildStartsWithConditionFromCriterion( final boolean isTimeseries, @Nonnull AspectRetriever aspectRetriever) { - if (!criterion.getValues().isEmpty()) { - return buildWildcardQueryWithMultipleValues( - fieldName, criterion, isTimeseries, queryName, aspectRetriever, "%s*"); - } - return buildWildcardQueryWithSingleValue( + return buildWildcardQueryWithMultipleValues( fieldName, criterion, isTimeseries, queryName, aspectRetriever, "%s*"); } @@ -695,11 +661,7 @@ private static QueryBuilder buildEndsWithConditionFromCriterion( final boolean isTimeseries, @Nonnull AspectRetriever aspectRetriever) { - if (!criterion.getValues().isEmpty()) { - return buildWildcardQueryWithMultipleValues( - fieldName, criterion, isTimeseries, queryName, aspectRetriever, "*%s"); - } - return buildWildcardQueryWithSingleValue( + return buildWildcardQueryWithMultipleValues( fieldName, criterion, isTimeseries, queryName, aspectRetriever, "*%s"); } @@ -709,19 +671,8 @@ private static QueryBuilder buildEqualsConditionFromCriterion( final boolean isTimeseries, final Map> searchableFieldTypes, @Nonnull AspectRetriever aspectRetriever) { - /* - * If the newer 'values' field of Criterion.pdl is set, then we - * handle using the following code to allow multi-match. - */ - if (!criterion.getValues().isEmpty()) { - return buildEqualsConditionFromCriterionWithValues( - fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever); - } - /* - * Otherwise, we are likely using the deprecated 'value' field. - * We handle using the legacy code path below. - */ - return buildEqualsFromCriterionWithValue(fieldName, criterion, isTimeseries, aspectRetriever); + return buildEqualsConditionFromCriterionWithValues( + fieldName, criterion, isTimeseries, searchableFieldTypes, aspectRetriever); } /** @@ -795,12 +746,7 @@ private static RangeQueryBuilder buildRangeQueryFromCriterion( // Determine criterion value, range query only accepts single value so take first value in // values if multiple - String criterionValueString; - if (!criterion.getValues().isEmpty()) { - criterionValueString = criterion.getValues().get(0).trim(); - } else { - criterionValueString = criterion.getValue().trim(); - } + String criterionValueString = criterion.getValues().get(0).trim(); Object criterionValue; String documentFieldName; if (fieldTypes.contains(BOOLEAN_FIELD_TYPE)) { @@ -829,48 +775,6 @@ private static RangeQueryBuilder buildRangeQueryFromCriterion( } } - /** - * Builds an instance of {@link QueryBuilder} representing an EQUALS condition which was created - * using the deprecated 'value' field of Criterion.pdl model. - * - *

Previously, we supported comma-separate values inside of a single string field, thus we have - * to account for splitting and matching against each value below. - * - *

For all new code, we should be using the new 'values' field for performing multi-match. This - * is simply retained for backwards compatibility of the search API. - */ - @Deprecated - private static QueryBuilder buildEqualsFromCriterionWithValue( - @Nonnull final String fieldName, - @Nonnull final Criterion criterion, - final boolean isTimeseries, - @Nonnull AspectRetriever aspectRetriever) { - // If the value is an URN style value, then we do not attempt to split it by comma (for obvious - // reasons) - if (isUrn(criterion.getValue())) { - return QueryBuilders.matchQuery( - toKeywordField(criterion.getField(), isTimeseries, aspectRetriever), - criterion.getValue().trim()) - .queryName(fieldName) - .analyzer(KEYWORD_ANALYZER); - } - final BoolQueryBuilder filters = new BoolQueryBuilder(); - // Cannot assume the existence of a .keyword or other subfield (unless contains `.`) - // Cannot assume the type of the underlying field or subfield thus KEYWORD_ANALYZER is forced - List fields = - criterion.getField().contains(".") - ? List.of(criterion.getField()) - : List.of(criterion.getField(), criterion.getField() + ".*"); - Arrays.stream(criterion.getValue().trim().split("\\s*,\\s*")) - .forEach( - elem -> - filters.should( - QueryBuilders.multiMatchQuery(elem, fields.toArray(new String[0])) - .queryName(fieldName) - .analyzer(KEYWORD_ANALYZER))); - return filters; - } - @Nonnull public static BoolQueryBuilder applyDefaultSearchFilters( @Nonnull OperationContext opContext, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/SearchUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/SearchUtils.java index add2b1526ab67..4f71a87ca16a8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/SearchUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/utils/SearchUtils.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.io.InputStream; import java.util.Collections; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Predicate; @@ -42,7 +43,7 @@ private SearchUtils() {} * @return a request map */ @Nonnull - public static Map getRequestMap(@Nullable Filter requestParams) { + public static Map> getRequestMap(@Nullable Filter requestParams) { if (requestParams == null) { return Collections.emptyMap(); } @@ -67,7 +68,7 @@ public static Map getRequestMap(@Nullable Filter requestParams) }); return criterionArray.stream() - .collect(Collectors.toMap(Criterion::getField, Criterion::getValue)); + .collect(Collectors.toMap(Criterion::getField, Criterion::getValues)); } public static boolean isUrn(@Nonnull String value) { diff --git a/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java index 41addbe197f27..134c65d2b5fae 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffect.java @@ -4,6 +4,7 @@ import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_KEY_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX; +import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; @@ -20,7 +21,6 @@ import com.linkedin.metadata.entity.ebean.batch.PatchItemImpl; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.StructuredPropertyUtils; -import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; import com.linkedin.metadata.query.filter.Criterion; @@ -170,12 +170,11 @@ private Filter getFilter() { final ConjunctiveCriterion conjunction = new ConjunctiveCriterion(); final CriterionArray andCriterion = new CriterionArray(); - final Criterion propertyExistsCriterion = new Criterion(); // Cannot rely on automatic field name since the definition is deleted - propertyExistsCriterion.setField( - STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX - + StructuredPropertyUtils.toElasticsearchFieldName(propertyUrn, definition)); - propertyExistsCriterion.setCondition(Condition.EXISTS); + final Criterion propertyExistsCriterion = + buildExistsCriterion( + STRUCTURED_PROPERTY_MAPPING_FIELD_PREFIX + + StructuredPropertyUtils.toElasticsearchFieldName(propertyUrn, definition)); andCriterion.add(propertyExistsCriterion); conjunction.setAnd(andCriterion); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java index cb364f41aa218..67518121edae4 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/ElasticSearchTimeseriesAspectService.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.timeseries.elastic; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.codahale.metrics.Timer; import com.datahub.util.RecordUtils; @@ -339,20 +340,21 @@ public List getAspectValues( filterQueryBuilder.mustNot(QueryBuilders.termQuery(MappingsBuilder.IS_EXPLODED_FIELD, true)); if (startTimeMillis != null) { Criterion startTimeCriterion = - new Criterion() - .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTimeMillis.toString()); + buildCriterion( + MappingsBuilder.TIMESTAMP_MILLIS_FIELD, + Condition.GREATER_THAN_OR_EQUAL_TO, + startTimeMillis.toString()); filterQueryBuilder.must( ESUtils.getQueryBuilderFromCriterion( startTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); } if (endTimeMillis != null) { Criterion endTimeCriterion = - new Criterion() - .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(endTimeMillis.toString()); + buildCriterion( + MappingsBuilder.TIMESTAMP_MILLIS_FIELD, + Condition.LESS_THAN_OR_EQUAL_TO, + endTimeMillis.toString()); + filterQueryBuilder.must( ESUtils.getQueryBuilderFromCriterion( endTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); @@ -575,20 +577,21 @@ public TimeseriesScrollResult scrollAspects( if (startTimeMillis != null) { Criterion startTimeCriterion = - new Criterion() - .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTimeMillis.toString()); + buildCriterion( + MappingsBuilder.TIMESTAMP_MILLIS_FIELD, + Condition.GREATER_THAN_OR_EQUAL_TO, + startTimeMillis.toString()); + filterQueryBuilder.filter( ESUtils.getQueryBuilderFromCriterion( startTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); } if (endTimeMillis != null) { Criterion endTimeCriterion = - new Criterion() - .setField(MappingsBuilder.TIMESTAMP_MILLIS_FIELD) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(endTimeMillis.toString()); + buildCriterion( + MappingsBuilder.TIMESTAMP_MILLIS_FIELD, + Condition.LESS_THAN_OR_EQUAL_TO, + endTimeMillis.toString()); filterQueryBuilder.filter( ESUtils.getQueryBuilderFromCriterion( endTimeCriterion, true, searchableFieldTypes, opContext, queryFilterRewriteChain)); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/UsageServiceUtil.java b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/UsageServiceUtil.java index abeefae3cf39f..54f97f45219ac 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/UsageServiceUtil.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/timeseries/elastic/UsageServiceUtil.java @@ -2,6 +2,7 @@ import static com.linkedin.metadata.Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH; import static com.linkedin.metadata.Constants.MAX_JACKSON_STRING_SIZE; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.codahale.metrics.Timer; import com.fasterxml.jackson.core.JsonProcessingException; @@ -97,26 +98,19 @@ public static UsageQueryResult query( // 1. Populate the filter. This is common for all queries. Filter filter = new Filter(); ArrayList criteria = new ArrayList<>(); - Criterion hasUrnCriterion = - new Criterion() - .setField("urn") - .setCondition(Condition.EQUAL) - .setValues(new StringArray(resource)); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, resource); + criteria.add(hasUrnCriterion); if (startTime != null) { Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValues(new StringArray(startTime.toString())); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); + criteria.add(startTimeCriterion); } if (endTime != null) { Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValues(new StringArray(endTime.toString())); + buildCriterion(ES_FIELD_TIMESTAMP, Condition.LESS_THAN_OR_EQUAL_TO, endTime.toString()); criteria.add(endTimeCriterion); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationCandidateSourceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationCandidateSourceTest.java index 99520c189034a..a3c21e45540f9 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationCandidateSourceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationCandidateSourceTest.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.recommendation.candidatesource; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.*; import static org.mockito.Mockito.*; import static org.testng.Assert.assertEquals; @@ -12,7 +13,7 @@ import com.linkedin.common.urn.TestEntityUrn; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.recommendation.RecommendationContent; import com.linkedin.metadata.recommendation.RecommendationParams; @@ -142,7 +143,7 @@ public void testWhenSearchServiceReturnsValueResults() { assertEquals(params.getSearchParams().getFilters().size(), 1); assertEquals( params.getSearchParams().getFilters().get(0), - new Criterion().setField("testValue").setValue("value1")); + buildCriterion("testValue", Condition.EQUAL, "value1")); assertNotNull(params.getContentParams()); assertEquals(params.getContentParams().getCount().longValue(), 1L); assertTrue( @@ -165,7 +166,7 @@ public void testWhenSearchServiceReturnsValueResults() { assertEquals(params.getSearchParams().getFilters().size(), 1); assertEquals( params.getSearchParams().getFilters().get(0), - new Criterion().setField("testValue").setValue("value3")); + buildCriterion("testValue", Condition.EQUAL, "value3")); assertNotNull(params.getContentParams()); assertEquals(params.getContentParams().getCount().longValue(), 3L); content = candidates.get(1); @@ -178,7 +179,7 @@ public void testWhenSearchServiceReturnsValueResults() { assertEquals(params.getSearchParams().getFilters().size(), 1); assertEquals( params.getSearchParams().getFilters().get(0), - new Criterion().setField("testValue").setValue("value2")); + buildCriterion("testValue", Condition.EQUAL, "value2")); assertNotNull(params.getContentParams()); assertEquals(params.getContentParams().getCount().longValue(), 2L); assertTrue( @@ -208,7 +209,7 @@ public void testWhenSearchServiceReturnsUrnResults() { assertEquals(params.getSearchParams().getFilters().size(), 1); assertEquals( params.getSearchParams().getFilters().get(0), - new Criterion().setField("testUrn").setValue(testUrn1.toString())); + buildCriterion("testUrn", Condition.EQUAL, testUrn1.toString())); assertNotNull(params.getContentParams()); assertEquals(params.getContentParams().getCount().longValue(), 1L); assertTrue( @@ -233,7 +234,7 @@ public void testWhenSearchServiceReturnsUrnResults() { assertEquals(params.getSearchParams().getFilters().size(), 1); assertEquals( params.getSearchParams().getFilters().get(0), - new Criterion().setField("testUrn").setValue(testUrn3.toString())); + buildCriterion("testUrn", Condition.EQUAL, testUrn3.toString())); assertNotNull(params.getContentParams()); assertEquals(params.getContentParams().getCount().longValue(), 3L); content = candidates.get(1); @@ -246,7 +247,7 @@ public void testWhenSearchServiceReturnsUrnResults() { assertEquals(params.getSearchParams().getFilters().size(), 1); assertEquals( params.getSearchParams().getFilters().get(0), - new Criterion().setField("testUrn").setValue(testUrn2.toString())); + buildCriterion("testUrn", Condition.EQUAL, testUrn2.toString())); assertNotNull(params.getContentParams()); assertEquals(params.getContentParams().getCount().longValue(), 2L); assertTrue( diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java index f689e8c98e3f1..d9268c1b50efe 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageServiceTestBase.java @@ -2,6 +2,7 @@ import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; import static com.linkedin.metadata.Constants.ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; @@ -942,14 +943,10 @@ public void testLightningSearchService() throws Exception { // Platform ConjunctiveCriterionArray conCritArr = new ConjunctiveCriterionArray(); Criterion platform1Crit = - new Criterion() - .setField("platform") - .setValue("urn:li:dataPlatform:kafka") - .setCondition(Condition.EQUAL); + buildCriterion("platform", Condition.EQUAL, "urn:li:dataPlatform:kafka"); CriterionArray critArr = new CriterionArray(ImmutableList.of(platform1Crit)); conCritArr.add(new ConjunctiveCriterion().setAnd(critArr)); - Criterion degreeCrit = - new Criterion().setField("degree").setValue("2").setCondition(Condition.EQUAL); + Criterion degreeCrit = buildCriterion("degree", Condition.EQUAL, "2"); conCritArr.add( new ConjunctiveCriterion().setAnd(new CriterionArray(ImmutableList.of(degreeCrit)))); Filter filter = new Filter().setOr(conCritArr); @@ -1125,12 +1122,12 @@ public void testLightningEnvFiltering() throws Exception { // Set up filters ConjunctiveCriterionArray conCritArr = new ConjunctiveCriterionArray(); - Criterion platform1Crit = - new Criterion().setField("platform").setValue(kafkaPlatform).setCondition(Condition.EQUAL); + Criterion platform1Crit = buildCriterion("platform", Condition.EQUAL, kafkaPlatform); + CriterionArray critArr = new CriterionArray(ImmutableList.of(platform1Crit)); conCritArr.add(new ConjunctiveCriterion().setAnd(critArr)); - Criterion originCrit = - new Criterion().setField("origin").setValue("DEV").setCondition(Condition.EQUAL); + Criterion originCrit = buildCriterion("origin", Condition.EQUAL, "DEV"); + conCritArr.add( new ConjunctiveCriterion().setAnd(new CriterionArray(ImmutableList.of(originCrit)))); @@ -1201,10 +1198,9 @@ public void testLightningPagination() throws Exception { // Set up filters ConjunctiveCriterionArray conCritArr = new ConjunctiveCriterionArray(); - Criterion platform1Crit = - new Criterion().setField("platform").setValue(kafkaPlatform).setCondition(Condition.EQUAL); - Criterion platform2Crit = - new Criterion().setField("platform").setValue(hivePlatform).setCondition(Condition.EQUAL); + Criterion platform1Crit = buildCriterion("platform", Condition.EQUAL, kafkaPlatform); + + Criterion platform2Crit = buildCriterion("platform", Condition.EQUAL, hivePlatform); CriterionArray critArr = new CriterionArray(ImmutableList.of(platform1Crit)); conCritArr.add(new ConjunctiveCriterion().setAnd(critArr)); critArr = new CriterionArray(ImmutableList.of(platform2Crit)); @@ -1340,19 +1336,15 @@ public void testCanDoLightning() throws Exception { // Set up filters ConjunctiveCriterionArray conCritArr = new ConjunctiveCriterionArray(); - Criterion platform1Crit = - new Criterion().setField("platform").setValue(kafkaPlatform).setCondition(Condition.EQUAL); - Criterion platform2Crit = - new Criterion().setField("platform").setValue(hivePlatform).setCondition(Condition.EQUAL); + Criterion platform1Crit = buildCriterion("platform", Condition.EQUAL, kafkaPlatform); + + Criterion platform2Crit = buildCriterion("platform", Condition.EQUAL, hivePlatform); + CriterionArray critArr = new CriterionArray(ImmutableList.of(platform1Crit)); conCritArr.add(new ConjunctiveCriterion().setAnd(critArr)); critArr = new CriterionArray(ImmutableList.of(platform2Crit)); conCritArr.add(new ConjunctiveCriterion().setAnd(critArr)); - Criterion originCrit = - new Criterion() - .setField("origin") - .setValue(FabricType.PROD.name()) - .setCondition(Condition.EQUAL); + Criterion originCrit = buildCriterion("origin", Condition.EQUAL, FabricType.PROD.name()); conCritArr.add( new ConjunctiveCriterion().setAnd(new CriterionArray(ImmutableList.of(originCrit)))); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java index 45bc8548706bb..ba83a381916c2 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTestBase.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.search; import static com.linkedin.metadata.Constants.ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite; import static org.testng.Assert.assertEquals; @@ -11,7 +12,6 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.TestEntityUrn; import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.StringArray; import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; @@ -241,19 +241,9 @@ public void testSearchService() throws Exception { @Test public void testAdvancedSearchOr() throws Exception { - final Criterion filterCriterion = - new Criterion() - .setField("platform") - .setCondition(Condition.EQUAL) - .setValue("hive") - .setValues(new StringArray(ImmutableList.of("hive"))); - - final Criterion subtypeCriterion = - new Criterion() - .setField("subtypes") - .setCondition(Condition.EQUAL) - .setValue("") - .setValues(new StringArray(ImmutableList.of("view"))); + final Criterion filterCriterion = buildCriterion("platform", Condition.EQUAL, "hive"); + + final Criterion subtypeCriterion = buildCriterion("subtypes", Condition.EQUAL, "view"); final Filter filterWithCondition = new Filter() @@ -329,19 +319,9 @@ public void testAdvancedSearchOr() throws Exception { @Test public void testAdvancedSearchSoftDelete() throws Exception { - final Criterion filterCriterion = - new Criterion() - .setField("platform") - .setCondition(Condition.EQUAL) - .setValue("hive") - .setValues(new StringArray(ImmutableList.of("hive"))); - - final Criterion removedCriterion = - new Criterion() - .setField("removed") - .setCondition(Condition.EQUAL) - .setValue("") - .setValues(new StringArray(ImmutableList.of("true"))); + final Criterion filterCriterion = buildCriterion("platform", Condition.EQUAL, "hive"); + + final Criterion removedCriterion = buildCriterion("removed", Condition.EQUAL, "true"); final Filter filterWithCondition = new Filter() @@ -419,13 +399,7 @@ public void testAdvancedSearchSoftDelete() throws Exception { @Test public void testAdvancedSearchNegated() throws Exception { - final Criterion filterCriterion = - new Criterion() - .setField("platform") - .setCondition(Condition.EQUAL) - .setValue("hive") - .setNegated(true) - .setValues(new StringArray(ImmutableList.of("hive"))); + final Criterion filterCriterion = buildCriterion("platform", Condition.EQUAL, true, "hive"); final Filter filterWithCondition = new Filter() diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java index f53088bdffc90..1ebcc03eb690b 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/GoldenTestBase.java @@ -1,18 +1,18 @@ package com.linkedin.metadata.search.fixtures; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static io.datahubproject.test.search.SearchTestUtils.searchAcrossEntities; import static org.testng.Assert.*; import static org.testng.AssertJUnit.assertNotNull; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.types.entitytype.EntityTypeMapper; +import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.search.MatchedFieldArray; @@ -205,10 +205,7 @@ public void testFilterOnCountField() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("rowCount") - .setValue("") - .setValues(new StringArray(ImmutableList.of("68")))))))); + buildCriterion("rowCount", Condition.EQUAL, "68")))))); SearchResult searchResult = SearchTestUtils.facetAcrossEntities( getOperationContext(), diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java index 6a48dc19b029a..8cb0678180ccb 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/fixtures/SampleDataFixtureTestBase.java @@ -3,6 +3,7 @@ import static com.linkedin.metadata.Constants.DATASET_ENTITY_NAME; import static com.linkedin.metadata.Constants.DATA_JOB_ENTITY_NAME; import static com.linkedin.metadata.search.elasticsearch.query.request.SearchQueryBuilder.STRUCTURED_QUERY_PREFIX; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static com.linkedin.metadata.utils.SearchUtil.AGGREGATION_SEPARATOR_CHAR; import static io.datahubproject.test.search.SearchTestUtils.*; import static org.testng.Assert.assertEquals; @@ -14,7 +15,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.generated.AutoCompleteResults; import com.linkedin.datahub.graphql.types.chart.ChartType; import com.linkedin.datahub.graphql.types.container.ContainerType; @@ -1464,10 +1464,7 @@ public void testPlatformTest() { Filter filter = new Filter(); ArrayList criteria = new ArrayList<>(); Criterion hasPlatformCriterion = - new Criterion() - .setField(fieldName) - .setCondition(Condition.EQUAL) - .setValue(testPlatform); + buildCriterion(fieldName, Condition.EQUAL, testPlatform); criteria.add(hasPlatformCriterion); filter.setOr( new ConjunctiveCriterionArray( @@ -2000,10 +1997,7 @@ public void testFilterOnHasValuesField() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("hasOwners") - .setValue("") - .setValues(new StringArray(ImmutableList.of("true")))))))); + buildCriterion("hasOwners", Condition.EQUAL, "true")))))); SearchResult searchResult = searchAcrossEntities( getOperationContext(), @@ -2025,10 +2019,7 @@ public void testFilterOnNumValuesField() { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("numInputDatasets") - .setValue("") - .setValues(new StringArray(ImmutableList.of("1")))))))); + buildCriterion("numInputDatasets", Condition.EQUAL, "1")))))); SearchResult searchResult = searchAcrossEntities( getOperationContext(), diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java index e0258f0593339..eafe5c7b5c310 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/SearchDAOTestBase.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.search.query; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static com.linkedin.metadata.utils.SearchUtil.AGGREGATION_SEPARATOR_CHAR; import static com.linkedin.metadata.utils.SearchUtil.ES_INDEX_FIELD; import static org.testng.Assert.assertEquals; @@ -9,9 +10,7 @@ import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; -import com.google.common.collect.ImmutableList; import com.linkedin.data.template.LongMap; -import com.linkedin.data.template.StringArray; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; @@ -51,12 +50,7 @@ public abstract class SearchDAOTestBase extends AbstractTestNGSpringContextTests @Test public void testTransformFilterForEntitiesNoChange() { Criterion c = - new Criterion() - .setValue("urn:li:tag:abc") - .setValues(new StringArray(ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def"))) - .setNegated(false) - .setCondition(Condition.EQUAL) - .setField("tags.keyword"); + buildCriterion("tags.keyword", Condition.EQUAL, "urn:ki:tag:abc", "urn:li:tag:def"); Filter f = new Filter() @@ -82,13 +76,7 @@ public void testTransformFilterForEntitiesNullFilter() { @Test public void testTransformFilterForEntitiesWithChanges() { - Criterion c = - new Criterion() - .setValue("dataset") - .setValues(new StringArray(ImmutableList.of("dataset"))) - .setNegated(false) - .setCondition(Condition.EQUAL) - .setField("_entityType"); + Criterion c = buildCriterion("_entityType", Condition.EQUAL, "dataset"); Filter f = new Filter() @@ -109,12 +97,7 @@ public void testTransformFilterForEntitiesWithChanges() { assertNotEquals(originalF, transformedFilter); Criterion expectedNewCriterion = - new Criterion() - .setValue("smpldat_datasetindex_v2") - .setValues(new StringArray(ImmutableList.of("smpldat_datasetindex_v2"))) - .setNegated(false) - .setCondition(Condition.EQUAL) - .setField(ES_INDEX_FIELD); + buildCriterion(ES_INDEX_FIELD, Condition.EQUAL, "smpldat_datasetindex_v2"); Filter expectedNewFilter = new Filter() @@ -128,13 +111,7 @@ public void testTransformFilterForEntitiesWithChanges() { @Test public void testTransformFilterForEntitiesWithUnderscore() { - Criterion c = - new Criterion() - .setValue("data_job") - .setValues(new StringArray(ImmutableList.of("data_job"))) - .setNegated(false) - .setCondition(Condition.EQUAL) - .setField("_entityType"); + Criterion c = buildCriterion("_entityType", Condition.EQUAL, "data_job"); Filter f = new Filter() @@ -155,12 +132,7 @@ public void testTransformFilterForEntitiesWithUnderscore() { assertNotEquals(originalF, transformedFilter); Criterion expectedNewCriterion = - new Criterion() - .setValue("smpldat_datajobindex_v2") - .setValues(new StringArray(ImmutableList.of("smpldat_datajobindex_v2"))) - .setNegated(false) - .setCondition(Condition.EQUAL) - .setField(ES_INDEX_FIELD); + buildCriterion(ES_INDEX_FIELD, Condition.EQUAL, "smpldat_datajobindex_v2"); Filter expectedNewFilter = new Filter() @@ -174,20 +146,10 @@ public void testTransformFilterForEntitiesWithUnderscore() { @Test public void testTransformFilterForEntitiesWithSomeChanges() { - Criterion criterionChanged = - new Criterion() - .setValue("dataset") - .setValues(new StringArray(ImmutableList.of("dataset"))) - .setNegated(false) - .setCondition(Condition.EQUAL) - .setField("_entityType"); + Criterion criterionChanged = buildCriterion("_entityType", Condition.EQUAL, "dataset"); + Criterion criterionUnchanged = - new Criterion() - .setValue("urn:li:tag:abc") - .setValues(new StringArray(ImmutableList.of("urn:li:tag:abc", "urn:li:tag:def"))) - .setNegated(false) - .setCondition(Condition.EQUAL) - .setField("tags.keyword"); + buildCriterion("tags.keyword", Condition.EQUAL, "urn:li:tag:abc", "urn:li:tag:def"); Filter f = new Filter() @@ -209,12 +171,7 @@ public void testTransformFilterForEntitiesWithSomeChanges() { assertNotEquals(originalF, transformedFilter); Criterion expectedNewCriterion = - new Criterion() - .setValue("smpldat_datasetindex_v2") - .setValues(new StringArray(ImmutableList.of("smpldat_datasetindex_v2"))) - .setNegated(false) - .setCondition(Condition.EQUAL) - .setField(ES_INDEX_FIELD); + buildCriterion(ES_INDEX_FIELD, Condition.EQUAL, "smpldat_datasetindex_v2"); Filter expectedNewFilter = new Filter() diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java index 2c49567d49ea7..f91e3a28f1bd6 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/ContainerExpansionRewriterTest.java @@ -2,8 +2,8 @@ import static com.linkedin.metadata.Constants.CONTAINER_ENTITY_NAME; import static com.linkedin.metadata.search.utils.QueryUtils.EMPTY_FILTER; -import static com.linkedin.metadata.search.utils.QueryUtils.newCriterion; import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.ArgumentMatchers.isNull; @@ -114,7 +114,9 @@ public void testTermsQueryRewrite() { // Setup nested when(mockGraphRetriever.scrollRelatedEntities( eq(List.of(CONTAINER_ENTITY_NAME)), - eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(childUrn)))), + eq( + QueryUtils.newDisjunctiveFilter( + buildCriterion("urn", Condition.EQUAL, List.of(childUrn)))), eq(List.of(CONTAINER_ENTITY_NAME)), eq(EMPTY_FILTER), eq(List.of("IsPartOf")), @@ -161,7 +163,9 @@ public void testTermsQueryRewritePagination() { // Page 1 when(mockGraphRetriever.scrollRelatedEntities( eq(List.of(CONTAINER_ENTITY_NAME)), - eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(childUrn)))), + eq( + QueryUtils.newDisjunctiveFilter( + buildCriterion("urn", Condition.EQUAL, List.of(childUrn)))), eq(List.of(CONTAINER_ENTITY_NAME)), eq(EMPTY_FILTER), eq(List.of("IsPartOf")), @@ -183,7 +187,9 @@ public void testTermsQueryRewritePagination() { // Page 2 when(mockGraphRetriever.scrollRelatedEntities( eq(List.of(CONTAINER_ENTITY_NAME)), - eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(childUrn)))), + eq( + QueryUtils.newDisjunctiveFilter( + buildCriterion("urn", Condition.EQUAL, List.of(childUrn)))), eq(List.of(CONTAINER_ENTITY_NAME)), eq(EMPTY_FILTER), eq(List.of("IsPartOf")), @@ -206,7 +212,7 @@ public void testTermsQueryRewritePagination() { eq(List.of(CONTAINER_ENTITY_NAME)), eq( QueryUtils.newDisjunctiveFilter( - newCriterion("urn", List.of(parentUrn2, parentUrn)))), + buildCriterion("urn", Condition.EQUAL, List.of(parentUrn2, parentUrn)))), eq(List.of(CONTAINER_ENTITY_NAME)), eq(EMPTY_FILTER), eq(List.of("IsPartOf")), @@ -233,7 +239,7 @@ public void testTermsQueryRewritePagination() { eq(List.of(CONTAINER_ENTITY_NAME)), eq( QueryUtils.newDisjunctiveFilter( - newCriterion("urn", List.of(parentUrn2, parentUrn)))), + buildCriterion("urn", Condition.EQUAL, List.of(parentUrn2, parentUrn)))), eq(List.of(CONTAINER_ENTITY_NAME)), eq(EMPTY_FILTER), eq(List.of("IsPartOf")), @@ -284,7 +290,9 @@ public void testNestedBoolQueryRewrite() { // Setup nested container when(mockGraphRetriever.scrollRelatedEntities( eq(List.of(CONTAINER_ENTITY_NAME)), - eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(childUrn)))), + eq( + QueryUtils.newDisjunctiveFilter( + buildCriterion("urn", Condition.EQUAL, List.of(childUrn)))), eq(List.of(CONTAINER_ENTITY_NAME)), eq(EMPTY_FILTER), eq(List.of("IsPartOf")), diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java index 8ee7dd3718ca9..76e650f405456 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/filter/DomainExpansionRewriterTest.java @@ -2,8 +2,8 @@ import static com.linkedin.metadata.Constants.DOMAIN_ENTITY_NAME; import static com.linkedin.metadata.search.utils.QueryUtils.EMPTY_FILTER; -import static com.linkedin.metadata.search.utils.QueryUtils.newCriterion; import static com.linkedin.metadata.search.utils.QueryUtils.newRelationshipFilter; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.ArgumentMatchers.isNull; @@ -114,7 +114,9 @@ public void testTermsQueryRewrite() { // Setup nested when(mockGraphRetriever.scrollRelatedEntities( eq(List.of(DOMAIN_ENTITY_NAME)), - eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(parentUrn)))), + eq( + QueryUtils.newDisjunctiveFilter( + buildCriterion("urn", Condition.EQUAL, List.of(parentUrn)))), eq(List.of(DOMAIN_ENTITY_NAME)), eq(EMPTY_FILTER), eq(List.of("IsPartOf")), @@ -161,7 +163,9 @@ public void testTermsQueryRewritePagination() { // Page 1 when(mockGraphRetriever.scrollRelatedEntities( eq(List.of(DOMAIN_ENTITY_NAME)), - eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(grandParentUrn)))), + eq( + QueryUtils.newDisjunctiveFilter( + buildCriterion("urn", Condition.EQUAL, List.of(grandParentUrn)))), eq(List.of(DOMAIN_ENTITY_NAME)), eq(EMPTY_FILTER), eq(List.of("IsPartOf")), @@ -187,7 +191,9 @@ public void testTermsQueryRewritePagination() { // Page 2 when(mockGraphRetriever.scrollRelatedEntities( eq(List.of(DOMAIN_ENTITY_NAME)), - eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(grandParentUrn)))), + eq( + QueryUtils.newDisjunctiveFilter( + buildCriterion("urn", Condition.EQUAL, List.of(grandParentUrn)))), eq(List.of(DOMAIN_ENTITY_NAME)), eq(EMPTY_FILTER), eq(List.of("IsPartOf")), @@ -214,7 +220,7 @@ public void testTermsQueryRewritePagination() { eq(List.of(DOMAIN_ENTITY_NAME)), eq( QueryUtils.newDisjunctiveFilter( - newCriterion("urn", List.of(parentUrn2, parentUrn)))), + buildCriterion("urn", Condition.EQUAL, List.of(parentUrn2, parentUrn)))), eq(List.of(DOMAIN_ENTITY_NAME)), eq(EMPTY_FILTER), eq(List.of("IsPartOf")), @@ -237,7 +243,7 @@ public void testTermsQueryRewritePagination() { eq(List.of(DOMAIN_ENTITY_NAME)), eq( QueryUtils.newDisjunctiveFilter( - newCriterion("urn", List.of(parentUrn2, parentUrn)))), + buildCriterion("urn", Condition.EQUAL, List.of(parentUrn2, parentUrn)))), eq(List.of(DOMAIN_ENTITY_NAME)), eq(EMPTY_FILTER), eq(List.of("IsPartOf")), @@ -285,7 +291,9 @@ public void testNestedBoolQueryRewrite() { // Setup nested when(mockGraphRetriever.scrollRelatedEntities( eq(List.of(DOMAIN_ENTITY_NAME)), - eq(QueryUtils.newDisjunctiveFilter(newCriterion("urn", List.of(parentUrn)))), + eq( + QueryUtils.newDisjunctiveFilter( + buildCriterion("urn", Condition.EQUAL, List.of(parentUrn)))), eq(List.of(DOMAIN_ENTITY_NAME)), eq(EMPTY_FILTER), eq(List.of("IsPartOf")), diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index aec86c887e4df..a90c0291f53b8 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -1,5 +1,8 @@ package com.linkedin.metadata.search.query.request; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; +import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; +import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion; import static com.linkedin.metadata.utils.SearchUtil.*; import static org.mockito.Mockito.mock; import static org.testng.Assert.*; @@ -36,7 +39,6 @@ import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.ExistsQueryBuilder; import org.opensearch.index.query.MatchQueryBuilder; -import org.opensearch.index.query.MultiMatchQueryBuilder; import org.opensearch.index.query.TermsQueryBuilder; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregationBuilders; @@ -368,8 +370,7 @@ public void testFilteredSearch() { private BoolQueryBuilder constructFilterQuery( SearchRequestHandler requestHandler, boolean scroll) { - final Criterion filterCriterion = - new Criterion().setField("keyword").setCondition(Condition.EQUAL).setValue("some value"); + final Criterion filterCriterion = buildCriterion("keyword", Condition.EQUAL, "some value"); final Filter filterWithoutRemovedCondition = new Filter() @@ -435,14 +436,9 @@ private void testFilterQuery(BoolQueryBuilder testQuery) { private BoolQueryBuilder constructRemovedQuery( SearchRequestHandler requestHandler, boolean scroll) { - final Criterion filterCriterion = - new Criterion().setField("keyword").setCondition(Condition.EQUAL).setValue("some value"); + final Criterion filterCriterion = buildCriterion("keyword", Condition.EQUAL, "some value"); - final Criterion removedCriterion = - new Criterion() - .setField("removed") - .setCondition(Condition.EQUAL) - .setValue(String.valueOf(false)); + final Criterion removedCriterion = buildCriterion("removed", Condition.EQUAL, "false"); final Filter filterWithRemovedCondition = new Filter() @@ -515,12 +511,7 @@ private void testRemovedQuery(BoolQueryBuilder queryWithRemoved) { // field EQUAL [value1, value2, ...] @Test public void testFilterFieldTagsByValues() { - final Criterion filterCriterion = - new Criterion() - .setField("fieldTags") - .setCondition(Condition.EQUAL) - .setValue("v1") - .setValues(new StringArray("v1", "v2")); + final Criterion filterCriterion = buildCriterion("fieldTags", Condition.EQUAL, "v1", "v2"); final BoolQueryBuilder testQuery = getQuery(filterCriterion); @@ -562,88 +553,11 @@ public void testFilterFieldTagsByValues() { } } - // For fields that are one of EDITABLE_FIELD_TO_QUERY_PAIRS, we want to make sure - // a filter that has a single value will result in one filter for each field in the - // pair of fields - @Test - public void testFilterFieldTagsByValue() { - final Criterion filterCriterion = - new Criterion().setField("fieldTags").setCondition(Condition.EQUAL).setValue("v1"); - - final BoolQueryBuilder testQuery = getQuery(filterCriterion); - - // bool -> must -> [bool] -> should -> [bool] -> must -> [bool] -> should -> [bool] -> should -> - // [match] - List matchQueryBuilders = - testQuery.filter().stream() - .filter(or -> or instanceof BoolQueryBuilder) - .flatMap(or -> ((BoolQueryBuilder) or).should().stream()) - .filter(should -> should instanceof BoolQueryBuilder) - .flatMap(should -> ((BoolQueryBuilder) should).filter().stream()) - .filter(must -> must instanceof BoolQueryBuilder) - .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) - .filter(should -> should instanceof BoolQueryBuilder) - .flatMap(should -> ((BoolQueryBuilder) should).should().stream()) - .filter(should -> should instanceof MultiMatchQueryBuilder) - .map(should -> (MultiMatchQueryBuilder) should) - .collect(Collectors.toList()); - - assertTrue(matchQueryBuilders.size() == 2, "Expected to find two match queries"); - Map matchMap = new HashMap<>(); - matchQueryBuilders.forEach( - matchQueryBuilder -> { - Set fields = matchQueryBuilder.fields().keySet(); - assertTrue(matchQueryBuilder.value() instanceof String); - fields.forEach(field -> matchMap.put(field, (String) matchQueryBuilder.value())); - }); - - assertTrue(matchMap.containsKey("fieldTags.keyword")); - assertTrue(matchMap.containsKey("editedFieldTags.keyword")); - for (String value : matchMap.values()) { - assertTrue(value.equals("v1")); - } - } - - // Test fields not in EDITABLE_FIELD_TO_QUERY_PAIRS with a single value - @Test - public void testFilterPlatformByValue() { - final Criterion filterCriterion = - new Criterion().setField("platform").setCondition(Condition.EQUAL).setValue("mysql"); - - final BoolQueryBuilder testQuery = getQuery(filterCriterion); - - // bool -> filter -> [bool] -> should -> [bool] -> filter -> [bool] -> should -> [match] - List matchQueryBuilders = - testQuery.filter().stream() - .filter(or -> or instanceof BoolQueryBuilder) - .flatMap(or -> ((BoolQueryBuilder) or).should().stream()) - .filter(should -> should instanceof BoolQueryBuilder) - .flatMap(should -> ((BoolQueryBuilder) should).filter().stream()) - .filter(must -> must instanceof BoolQueryBuilder) - .flatMap(must -> ((BoolQueryBuilder) must).should().stream()) - .filter(should -> should instanceof MultiMatchQueryBuilder) - .map(should -> (MultiMatchQueryBuilder) should) - .collect(Collectors.toList()); - - assertTrue(matchQueryBuilders.size() == 1, "Expected to find one match query"); - MultiMatchQueryBuilder matchQueryBuilder = matchQueryBuilders.get(0); - assertEquals( - matchQueryBuilder.fields(), - Map.of( - "platform", 1.0f, - "platform.*", 1.0f)); - assertEquals(matchQueryBuilder.value(), "mysql"); - } - // Test fields not in EDITABLE_FIELD_TO_QUERY_PAIRS with a list of values @Test public void testFilterPlatformByValues() { final Criterion filterCriterion = - new Criterion() - .setField("platform") - .setCondition(Condition.EQUAL) - .setValue("mysql") - .setValues(new StringArray("mysql", "bigquery")); + buildCriterion("platform", Condition.EQUAL, "mysql", "bigquery"); final BoolQueryBuilder testQuery = getQuery(filterCriterion); @@ -678,13 +592,9 @@ public void testFilterPlatformByValues() { @Test public void testBrowsePathQueryFilter() { // Condition: has `browsePaths` AND does NOT have `browsePathV2` - Criterion missingBrowsePathV2 = new Criterion(); - missingBrowsePathV2.setCondition(Condition.IS_NULL); - missingBrowsePathV2.setField("browsePathV2"); + Criterion missingBrowsePathV2 = buildIsNullCriterion("browsePathV2"); // Excludes entities without browsePaths - Criterion hasBrowsePathV1 = new Criterion(); - hasBrowsePathV1.setCondition(Condition.EXISTS); - hasBrowsePathV1.setField("browsePaths"); + Criterion hasBrowsePathV1 = buildExistsCriterion("browsePaths"); CriterionArray criterionArray = new CriterionArray(); criterionArray.add(missingBrowsePathV2); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java index d171ef3cc7b2e..c5f9986284627 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/utils/ESUtilsTest.java @@ -2,15 +2,16 @@ import static com.linkedin.metadata.Constants.DATA_TYPE_URN_PREFIX; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; +import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; +import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion; import static org.mockito.ArgumentMatchers.anySet; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; -import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.SetMode; -import com.linkedin.data.template.StringArray; import com.linkedin.entity.Aspect; import com.linkedin.metadata.aspect.AspectRetriever; import com.linkedin.metadata.query.filter.Condition; @@ -81,11 +82,7 @@ public static void setup() throws RemoteInvocationException, URISyntaxException @Test public void testGetQueryBuilderFromCriterionEqualsValues() { - final Criterion singleValueCriterion = - new Criterion() - .setField("myTestField") - .setCondition(Condition.EQUAL) - .setValues(new StringArray(ImmutableList.of("value1"))); + final Criterion singleValueCriterion = buildCriterion("myTestField", Condition.EQUAL, "value1"); QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( @@ -107,10 +104,7 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { Assert.assertEquals(result.toString(), expected); final Criterion multiValueCriterion = - new Criterion() - .setField("myTestField") - .setCondition(Condition.EQUAL) - .setValues(new StringArray(ImmutableList.of("value1", "value2"))); + buildCriterion("myTestField", Condition.EQUAL, "value1", "value2"); result = ESUtils.getQueryBuilderFromCriterion( @@ -133,10 +127,7 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { Assert.assertEquals(result.toString(), expected); final Criterion timeseriesField = - new Criterion() - .setField("myTestField") - .setCondition(Condition.EQUAL) - .setValues(new StringArray(ImmutableList.of("value1", "value2"))); + buildCriterion("myTestField", Condition.EQUAL, "value1", "value2"); result = ESUtils.getQueryBuilderFromCriterion( @@ -162,7 +153,7 @@ public void testGetQueryBuilderFromCriterionEqualsValues() { @Test public void testGetQueryBuilderFromCriterionContain() { final Criterion singleValueCriterion = - new Criterion().setField("myTestField").setCondition(Condition.CONTAIN).setValue("value1"); + buildCriterion("myTestField", Condition.CONTAIN, "value1"); QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( @@ -174,23 +165,28 @@ public void testGetQueryBuilderFromCriterionContain() { String expected = "{\n" - + " \"wildcard\" : {\n" - + " \"myTestField.keyword\" : {\n" - + " \"wildcard\" : \"*value1*\",\n" - + " \"case_insensitive\" : true,\n" - + " \"boost\" : 1.0,\n" - + " \"_name\" : \"myTestField\"\n" - + " }\n" + + " \"bool\" : {\n" + + " \"should\" : [\n" + + " {\n" + + " \"wildcard\" : {\n" + + " \"myTestField.keyword\" : {\n" + + " \"wildcard\" : \"*value1*\",\n" + + " \"case_insensitive\" : true,\n" + + " \"boost\" : 1.0,\n" + + " \"_name\" : \"myTestField\"\n" + + " }\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"adjust_pure_negative\" : true,\n" + + " \"boost\" : 1.0\n" + " }\n" + "}"; Assert.assertEquals(result.toString(), expected); final Criterion multiValueCriterion = - new Criterion() - .setField("myTestField") - .setCondition(Condition.CONTAIN) - .setValues(new StringArray(ImmutableList.of("value1", "value2"))); + buildCriterion("myTestField", Condition.CONTAIN, "value1", "value2"); result = ESUtils.getQueryBuilderFromCriterion( @@ -236,10 +232,7 @@ public void testGetQueryBuilderFromCriterionContain() { @Test public void testWildcardQueryBuilderFromCriterionWhenStartsWith() { final Criterion singleValueCriterion = - new Criterion() - .setField("myTestField") - .setCondition(Condition.START_WITH) - .setValue("value1"); + buildCriterion("myTestField", Condition.START_WITH, "value1"); QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( @@ -251,23 +244,28 @@ public void testWildcardQueryBuilderFromCriterionWhenStartsWith() { String expected = "{\n" - + " \"wildcard\" : {\n" - + " \"myTestField.keyword\" : {\n" - + " \"wildcard\" : \"value1*\",\n" - + " \"case_insensitive\" : true,\n" - + " \"boost\" : 1.0,\n" - + " \"_name\" : \"myTestField\"\n" - + " }\n" + + " \"bool\" : {\n" + + " \"should\" : [\n" + + " {\n" + + " \"wildcard\" : {\n" + + " \"myTestField.keyword\" : {\n" + + " \"wildcard\" : \"value1*\",\n" + + " \"case_insensitive\" : true,\n" + + " \"boost\" : 1.0,\n" + + " \"_name\" : \"myTestField\"\n" + + " }\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"adjust_pure_negative\" : true,\n" + + " \"boost\" : 1.0\n" + " }\n" + "}"; Assert.assertEquals(result.toString(), expected); final Criterion multiValueCriterion = - new Criterion() - .setField("myTestField") - .setCondition(Condition.START_WITH) - .setValues(new StringArray(ImmutableList.of("value1", "value2"))); + buildCriterion("myTestField", Condition.START_WITH, "value1", "value2"); result = ESUtils.getQueryBuilderFromCriterion( @@ -313,7 +311,7 @@ public void testWildcardQueryBuilderFromCriterionWhenStartsWith() { @Test public void testWildcardQueryBuilderFromCriterionWhenEndsWith() { final Criterion singleValueCriterion = - new Criterion().setField("myTestField").setCondition(Condition.END_WITH).setValue("value1"); + buildCriterion("myTestField", Condition.END_WITH, "value1"); QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( @@ -325,22 +323,27 @@ public void testWildcardQueryBuilderFromCriterionWhenEndsWith() { String expected = "{\n" - + " \"wildcard\" : {\n" - + " \"myTestField.keyword\" : {\n" - + " \"wildcard\" : \"*value1\",\n" - + " \"case_insensitive\" : true,\n" - + " \"boost\" : 1.0,\n" - + " \"_name\" : \"myTestField\"\n" - + " }\n" + + " \"bool\" : {\n" + + " \"should\" : [\n" + + " {\n" + + " \"wildcard\" : {\n" + + " \"myTestField.keyword\" : {\n" + + " \"wildcard\" : \"*value1\",\n" + + " \"case_insensitive\" : true,\n" + + " \"boost\" : 1.0,\n" + + " \"_name\" : \"myTestField\"\n" + + " }\n" + + " }\n" + + " }\n" + + " ],\n" + + " \"adjust_pure_negative\" : true,\n" + + " \"boost\" : 1.0\n" + " }\n" + "}"; Assert.assertEquals(result.toString(), expected); final Criterion multiValueCriterion = - new Criterion() - .setField("myTestField") - .setCondition(Condition.END_WITH) - .setValues(new StringArray(ImmutableList.of("value1", "value2"))); + buildCriterion("myTestField", Condition.END_WITH, "value1", "value2"); result = ESUtils.getQueryBuilderFromCriterion( @@ -385,8 +388,7 @@ public void testWildcardQueryBuilderFromCriterionWhenEndsWith() { @Test public void testGetQueryBuilderFromCriterionExists() { - final Criterion singleValueCriterion = - new Criterion().setField("myTestField").setCondition(Condition.EXISTS); + final Criterion singleValueCriterion = buildExistsCriterion("myTestField"); QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( @@ -414,8 +416,7 @@ public void testGetQueryBuilderFromCriterionExists() { Assert.assertEquals(result.toString(), expected); // No diff in the timeseries field case for this condition. - final Criterion timeseriesField = - new Criterion().setField("myTestField").setCondition(Condition.EXISTS); + final Criterion timeseriesField = buildExistsCriterion("myTestField"); result = ESUtils.getQueryBuilderFromCriterion( @@ -445,8 +446,7 @@ public void testGetQueryBuilderFromCriterionExists() { @Test public void testGetQueryBuilderFromCriterionIsNull() { - final Criterion singleValueCriterion = - new Criterion().setField("myTestField").setCondition(Condition.IS_NULL); + final Criterion singleValueCriterion = buildIsNullCriterion("myTestField"); QueryBuilder result = ESUtils.getQueryBuilderFromCriterion( @@ -474,8 +474,7 @@ public void testGetQueryBuilderFromCriterionIsNull() { Assert.assertEquals(result.toString(), expected); // No diff in the timeseries case for this condition - final Criterion timeseriesField = - new Criterion().setField("myTestField").setCondition(Condition.IS_NULL); + final Criterion timeseriesField = buildIsNullCriterion("myTestField"); result = ESUtils.getQueryBuilderFromCriterion( @@ -507,11 +506,7 @@ public void testGetQueryBuilderFromCriterionIsNull() { public void testGetQueryBuilderFromCriterionFieldToExpand() { final Criterion singleValueCriterion = - new Criterion() - .setField(FIELD_TO_EXPAND) - .setCondition(Condition.EQUAL) - .setValue("") // Ignored - .setValues(new StringArray(ImmutableList.of("value1"))); + buildCriterion(FIELD_TO_EXPAND, Condition.EQUAL, "value1"); // Ensure that the query is expanded! QueryBuilder result = @@ -551,11 +546,7 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { Assert.assertEquals(result.toString(), expected); final Criterion timeseriesField = - new Criterion() - .setField(FIELD_TO_EXPAND) - .setCondition(Condition.EQUAL) - .setValue("") // Ignored - .setValues(new StringArray(ImmutableList.of("value1", "value2"))); + buildCriterion(FIELD_TO_EXPAND, Condition.EQUAL, "value1", "value2"); // Ensure that the query is expanded without keyword. result = @@ -601,10 +592,7 @@ public void testGetQueryBuilderFromCriterionFieldToExpand() { public void testGetQueryBuilderFromStructPropEqualsValue() { final Criterion singleValueCriterion = - new Criterion() - .setField("structuredProperties.ab.fgh.ten") - .setCondition(Condition.EQUAL) - .setValues(new StringArray(ImmutableList.of("value1"))); + buildCriterion("structuredProperties.ab.fgh.ten", Condition.EQUAL, "value1"); OperationContext opContext = mock(OperationContext.class); when(opContext.getAspectRetriever()).thenReturn(aspectRetriever); @@ -628,10 +616,7 @@ public void testGetQueryBuilderFromStructPropEqualsValue() { public void testGetQueryBuilderFromStructPropEqualsValueV1() { final Criterion singleValueCriterion = - new Criterion() - .setField("structuredProperties.ab.fgh.ten") - .setCondition(Condition.EQUAL) - .setValues(new StringArray(ImmutableList.of("value1"))); + buildCriterion("structuredProperties.ab.fgh.ten", Condition.EQUAL, "value1"); OperationContext opContextV1 = mock(OperationContext.class); when(opContextV1.getAspectRetriever()).thenReturn(aspectRetrieverV1); @@ -657,8 +642,7 @@ public void testGetQueryBuilderFromStructPropEqualsValueV1() { @Test public void testGetQueryBuilderFromStructPropExists() { - final Criterion singleValueCriterion = - new Criterion().setField("structuredProperties.ab.fgh.ten").setCondition(Condition.EXISTS); + final Criterion singleValueCriterion = buildExistsCriterion("structuredProperties.ab.fgh.ten"); OperationContext opContext = mock(OperationContext.class); when(opContext.getAspectRetriever()).thenReturn(aspectRetriever); @@ -684,8 +668,7 @@ public void testGetQueryBuilderFromStructPropExists() { Assert.assertEquals(result.toString(), expected); // No diff in the timeseries field case for this condition. - final Criterion timeseriesField = - new Criterion().setField("myTestField").setCondition(Condition.EXISTS); + final Criterion timeseriesField = buildExistsCriterion("myTestField"); result = ESUtils.getQueryBuilderFromCriterion( @@ -711,8 +694,7 @@ public void testGetQueryBuilderFromStructPropExists() { @Test public void testGetQueryBuilderFromStructPropExistsV1() { - final Criterion singleValueCriterion = - new Criterion().setField("structuredProperties.ab.fgh.ten").setCondition(Condition.EXISTS); + final Criterion singleValueCriterion = buildExistsCriterion("structuredProperties.ab.fgh.ten"); OperationContext opContextV1 = mock(OperationContext.class); when(opContextV1.getAspectRetriever()).thenReturn(aspectRetrieverV1); @@ -742,8 +724,7 @@ public void testGetQueryBuilderFromStructPropExistsV1() { Assert.assertEquals(result.toString(), expected); // No diff in the timeseries field case for this condition. - final Criterion timeseriesField = - new Criterion().setField("myTestField").setCondition(Condition.EXISTS); + final Criterion timeseriesField = buildCriterion("myTestField", Condition.EXISTS); result = ESUtils.getQueryBuilderFromCriterion( diff --git a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java index ab205d0463c4c..b1b716c560481 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/structuredproperties/hooks/PropertyDefinitionDeleteSideEffectTest.java @@ -2,6 +2,7 @@ import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_DEFINITION_ASPECT_NAME; import static com.linkedin.metadata.Constants.STRUCTURED_PROPERTY_KEY_ASPECT_NAME; +import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.eq; @@ -23,7 +24,6 @@ import com.linkedin.metadata.aspect.plugins.config.AspectPluginConfig; import com.linkedin.metadata.entity.SearchRetriever; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; import com.linkedin.metadata.query.filter.Criterion; @@ -178,10 +178,9 @@ private static Filter expectedFilter() { final ConjunctiveCriterion conjunction = new ConjunctiveCriterion(); final CriterionArray andCriterion = new CriterionArray(); - final Criterion propertyExistsCriterion = new Criterion(); - propertyExistsCriterion.setField( - "structuredProperties._versioned.io_acryl_privacy_retentionTime.00000000000001.string"); - propertyExistsCriterion.setCondition(Condition.EXISTS); + final Criterion propertyExistsCriterion = + buildExistsCriterion( + "structuredProperties._versioned.io_acryl_privacy_retentionTime.00000000000001.string"); andCriterion.add(propertyExistsCriterion); conjunction.setAnd(andCriterion); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java index 15597132289b2..faf616b0fb3cf 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeseries/search/TimeseriesAspectServiceTestBase.java @@ -2,6 +2,7 @@ import static com.linkedin.metadata.Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH; import static com.linkedin.metadata.Constants.MAX_JACKSON_STRING_SIZE; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static io.datahubproject.test.search.SearchTestUtils.syncAfterWrite; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; @@ -359,8 +360,7 @@ public void testGetAspectTimeseriesValuesAllSorted() { @Test(groups = "getAspectValues", dependsOnGroups = "upsert") public void testGetAspectTimeseriesValuesWithFilter() { Filter filter = new Filter(); - Criterion hasStatEqualsTwenty = - new Criterion().setField("stat").setCondition(Condition.EQUAL).setValue("20"); + Criterion hasStatEqualsTwenty = buildCriterion("stat", Condition.EQUAL, "20"); filter.setCriteria(new CriterionArray(hasStatEqualsTwenty)); List resultAspects = elasticSearchTimeseriesAspectService.getAspectValues( @@ -453,18 +453,15 @@ public void testGetAspectTimeseriesValueMissingUrn() { dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsLatestStatForDay1() { // Filter is only on the urn - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.START_WITH, TEST_URN.toString()); Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTime.toString()); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -511,20 +508,15 @@ public void testGetAggregatedStatsLatestStatForDay1() { dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsLatestStatForDay1WithValues() { // Filter is only on the urn - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValues(new StringArray(startTime.toString())) - .setValue(""); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValues(new StringArray(String.valueOf(startTime + 23 * TIME_INCREMENT))) - .setValue(""); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -571,18 +563,15 @@ public void testGetAggregatedStatsLatestStatForDay1WithValues() { dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsLatestAComplexNestedRecordForDay1() { // Filter is only on the urn - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTime.toString()); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -635,18 +624,15 @@ public void testGetAggregatedStatsLatestAComplexNestedRecordForDay1() { dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsLatestStrArrayDay1() { // Filter is only on the urn - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTime.toString()); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -699,18 +685,15 @@ public void testGetAggregatedStatsLatestStrArrayDay1() { dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsLatestStatForTwoDays() { // Filter is only on the urn - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTime.toString()); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(startTime + 47 * TIME_INCREMENT)); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(startTime + 47 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -760,18 +743,15 @@ public void testGetAggregatedStatsLatestStatForTwoDays() { groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsLatestStatForFirst10HoursOfDay1() { - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTime.toString()); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(startTime + 9 * TIME_INCREMENT)); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(startTime + 9 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -818,23 +798,17 @@ public void testGetAggregatedStatsLatestStatForFirst10HoursOfDay1() { dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsLatestStatForCol1Day1() { Long lastEntryTimeStamp = startTime + 23 * TIME_INCREMENT; - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTime.toString()); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(lastEntryTimeStamp)); - Criterion hasCol1 = - new Criterion() - .setField("componentProfiles.key") - .setCondition(Condition.EQUAL) - .setValue("col1"); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(lastEntryTimeStamp)); + + Criterion hasCol1 = buildCriterion("componentProfiles.key", Condition.EQUAL, "col1"); Filter filter = QueryUtils.getFilterFromCriteria( @@ -895,18 +869,17 @@ public void testGetAggregatedStatsLatestStatForCol1Day1() { dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsLatestStatForAllColumnsDay1() { Long lastEntryTimeStamp = startTime + 23 * TIME_INCREMENT; - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); + Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTime.toString()); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); + Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(lastEntryTimeStamp)); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(lastEntryTimeStamp)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -977,18 +950,15 @@ public void testGetAggregatedStatsLatestStatForAllColumnsDay1() { groups = {"getAggregatedStats"}, dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsSumStatForFirst10HoursOfDay1() { - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTime.toString()); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(startTime + 9 * TIME_INCREMENT)); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(startTime + 9 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -1033,23 +1003,19 @@ public void testGetAggregatedStatsSumStatForFirst10HoursOfDay1() { dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsSumStatForCol2Day1() { Long lastEntryTimeStamp = startTime + 23 * TIME_INCREMENT; - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); + Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTime.toString()); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); + Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(lastEntryTimeStamp)); - Criterion hasCol2 = - new Criterion() - .setField("componentProfiles.key") - .setCondition(Condition.EQUAL) - .setValue("col2"); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(lastEntryTimeStamp)); + + Criterion hasCol2 = buildCriterion("componentProfiles.key", Condition.EQUAL, "col2"); Filter filter = QueryUtils.getFilterFromCriteria( @@ -1103,18 +1069,15 @@ public void testGetAggregatedStatsSumStatForCol2Day1() { dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsCardinalityAggStrStatDay1() { // Filter is only on the urn - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTime.toString()); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -1159,18 +1122,15 @@ public void testGetAggregatedStatsCardinalityAggStrStatDay1() { dependsOnGroups = {"upsert"}) public void testGetAggregatedStatsSumStatsCollectionDay1() { // Filter is only on the urn - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTime.toString()); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -1214,18 +1174,15 @@ public void testGetAggregatedStatsSumStatsCollectionDay1() { groups = {"deleteAspectValues1"}, dependsOnGroups = {"getAggregatedStats", "getAspectValues", "testCountBeforeDelete"}) public void testDeleteAspectValuesByUrnAndTimeRangeDay1() { - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTime.toString()); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter filter = QueryUtils.getFilterFromCriteria( @@ -1243,8 +1200,8 @@ public void testDeleteAspectValuesByUrnAndTimeRangeDay1() { groups = {"deleteAspectValues2"}, dependsOnGroups = {"deleteAspectValues1", "testCountAfterDelete"}) public void testDeleteAspectValuesByUrn() { - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); + Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion)); DeleteAspectValuesResult result = elasticSearchTimeseriesAspectService.deleteAspectValues( @@ -1259,8 +1216,8 @@ public void testDeleteAspectValuesByUrn() { dependsOnGroups = {"upsert"}) public void testCountByFilter() { // Test with filter - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); + Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion)); long count = elasticSearchTimeseriesAspectService.countByFilter( @@ -1269,15 +1226,14 @@ public void testCountByFilter() { // Test with filter with multiple criteria Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTime.toString()); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); + Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter urnAndTimeFilter = QueryUtils.getFilterFromCriteria( @@ -1301,8 +1257,8 @@ public void testCountByFilter() { public void testCountByFilterAfterDelete() throws Exception { syncAfterWrite(getBulkProcessor()); // Test with filter - Criterion hasUrnCriterion = - new Criterion().setField("urn").setCondition(Condition.EQUAL).setValue(TEST_URN.toString()); + Criterion hasUrnCriterion = buildCriterion("urn", Condition.EQUAL, TEST_URN.toString()); + Filter filter = QueryUtils.getFilterFromCriteria(ImmutableList.of(hasUrnCriterion)); long count = elasticSearchTimeseriesAspectService.countByFilter( @@ -1311,15 +1267,14 @@ public void testCountByFilterAfterDelete() throws Exception { // Test with filter with multiple criteria Criterion startTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.GREATER_THAN_OR_EQUAL_TO) - .setValue(startTime.toString()); + buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTime.toString()); + Criterion endTimeCriterion = - new Criterion() - .setField(ES_FIELD_TIMESTAMP) - .setCondition(Condition.LESS_THAN_OR_EQUAL_TO) - .setValue(String.valueOf(startTime + 23 * TIME_INCREMENT)); + buildCriterion( + ES_FIELD_TIMESTAMP, + Condition.LESS_THAN_OR_EQUAL_TO, + String.valueOf(startTime + 23 * TIME_INCREMENT)); Filter urnAndTimeFilter = QueryUtils.getFilterFromCriteria( diff --git a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java index bbe0feed7de11..ed585d8fb0cb2 100644 --- a/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java +++ b/metadata-jobs/mae-consumer/src/main/java/com/linkedin/metadata/kafka/hook/siblings/SiblingAssociationHook.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.kafka.hook.siblings; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; @@ -425,10 +426,9 @@ private Filter createFilterForEntitiesWithYouAsSibling(final Urn entityUrn) { final ConjunctiveCriterion conjunction = new ConjunctiveCriterion(); final CriterionArray andCriterion = new CriterionArray(); - final Criterion urnCriterion = new Criterion(); - urnCriterion.setField("siblings.keyword"); - urnCriterion.setValue(entityUrn.toString()); - urnCriterion.setCondition(Condition.EQUAL); + final Criterion urnCriterion = + buildCriterion("siblings.keyword", Condition.EQUAL, entityUrn.toString()); + andCriterion.add(urnCriterion); conjunction.setAnd(andCriterion); diff --git a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/invite/InviteTokenService.java b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/invite/InviteTokenService.java index c8ffd36f5ebc9..8bc18eae92a23 100644 --- a/metadata-service/auth-impl/src/main/java/com/datahub/authentication/invite/InviteTokenService.java +++ b/metadata-service/auth-impl/src/main/java/com/datahub/authentication/invite/InviteTokenService.java @@ -2,6 +2,7 @@ import static com.linkedin.metadata.Constants.*; import static com.linkedin.metadata.entity.AspectUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.linkedin.common.urn.Urn; import com.linkedin.entity.EntityResponse; @@ -117,10 +118,7 @@ private Filter createInviteTokenFilter() { final ConjunctiveCriterion conjunction = new ConjunctiveCriterion(); final CriterionArray andCriterion = new CriterionArray(); - final Criterion roleCriterion = new Criterion(); - roleCriterion.setField(HAS_ROLE_FIELD_NAME); - roleCriterion.setValue("false"); - roleCriterion.setCondition(Condition.EQUAL); + final Criterion roleCriterion = buildCriterion(HAS_ROLE_FIELD_NAME, Condition.EQUAL, "false"); andCriterion.add(roleCriterion); conjunction.setAnd(andCriterion); @@ -136,10 +134,7 @@ private Filter createInviteTokenFilter(@Nonnull final String roleUrnStr) { final ConjunctiveCriterion conjunction = new ConjunctiveCriterion(); final CriterionArray andCriterion = new CriterionArray(); - final Criterion roleCriterion = new Criterion(); - roleCriterion.setField(ROLE_FIELD_NAME); - roleCriterion.setValue(roleUrnStr); - roleCriterion.setCondition(Condition.EQUAL); + final Criterion roleCriterion = buildCriterion(ROLE_FIELD_NAME, Condition.EQUAL, roleUrnStr); andCriterion.add(roleCriterion); conjunction.setAnd(andCriterion); diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java index 668892bb46b7f..2c00c73c96549 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.boot.steps; -import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildExistsCriterion; +import static com.linkedin.metadata.utils.CriterionUtils.buildIsNullCriterion; import static com.linkedin.metadata.utils.SystemMetadataUtils.createDefaultSystemMetadata; import com.google.common.collect.ImmutableList; @@ -13,7 +14,6 @@ import com.linkedin.metadata.aspect.utils.DefaultAspectsUtil; import com.linkedin.metadata.boot.UpgradeStep; import com.linkedin.metadata.entity.EntityService; -import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; import com.linkedin.metadata.query.filter.Criterion; @@ -89,13 +89,10 @@ private String backfillBrowsePathsV2( throws Exception { // Condition: has `browsePaths` AND does NOT have `browsePathV2` - Criterion missingBrowsePathV2 = new Criterion(); - missingBrowsePathV2.setCondition(Condition.IS_NULL); - missingBrowsePathV2.setField("browsePathV2"); + Criterion missingBrowsePathV2 = buildIsNullCriterion("browsePathV2"); + // Excludes entities without browsePaths - Criterion hasBrowsePathV1 = new Criterion(); - hasBrowsePathV1.setCondition(Condition.EXISTS); - hasBrowsePathV1.setField("browsePaths"); + Criterion hasBrowsePathV1 = buildExistsCriterion("browsePaths"); CriterionArray criterionArray = new CriterionArray(); criterionArray.add(missingBrowsePathV2); diff --git a/metadata-service/factories/src/test/java/com/linkedin/gms/factory/search/CacheTest.java b/metadata-service/factories/src/test/java/com/linkedin/gms/factory/search/CacheTest.java index 153348e7ec119..dc0f436888ef1 100644 --- a/metadata-service/factories/src/test/java/com/linkedin/gms/factory/search/CacheTest.java +++ b/metadata-service/factories/src/test/java/com/linkedin/gms/factory/search/CacheTest.java @@ -2,6 +2,7 @@ import static com.datahub.util.RecordUtils.*; import static com.linkedin.metadata.search.client.CachingEntitySearchService.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.Mockito.mock; import com.google.common.collect.ImmutableList; @@ -10,7 +11,6 @@ import com.hazelcast.jet.core.JetTestSupport; import com.hazelcast.spring.cache.HazelcastCacheManager; import com.linkedin.common.urn.CorpuserUrn; -import com.linkedin.data.template.StringArray; import com.linkedin.metadata.graph.EntityLineageResult; import com.linkedin.metadata.graph.LineageDirection; import com.linkedin.metadata.graph.LineageRelationship; @@ -123,19 +123,9 @@ public void hazelcastTestScroll() { .setPageSize(1) .setMetadata(new SearchResultMetadata()); - final Criterion filterCriterion = - new Criterion() - .setField("platform") - .setCondition(Condition.EQUAL) - .setValue("hive") - .setValues(new StringArray(ImmutableList.of("hive"))); - - final Criterion subtypeCriterion = - new Criterion() - .setField("subtypes") - .setCondition(Condition.EQUAL) - .setValue("") - .setValues(new StringArray(ImmutableList.of("view"))); + final Criterion filterCriterion = buildCriterion("platform", Condition.EQUAL, "hive"); + + final Criterion subtypeCriterion = buildCriterion("subtypes", Condition.EQUAL, "view"); final Filter filterWithCondition = new Filter() diff --git a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java index bc5b9e439d293..824460b8a1a50 100644 --- a/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java +++ b/metadata-service/restli-client/src/main/java/com/linkedin/entity/client/RestliEntityClient.java @@ -1,5 +1,7 @@ package com.linkedin.entity.client; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.datahub.plugins.auth.authorization.Authorizer; import com.datahub.util.RecordUtils; import com.google.common.collect.ImmutableList; @@ -59,7 +61,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; @@ -1181,7 +1182,7 @@ public static Filter newFilter(@Nullable Map params) { CriterionArray criteria = params.entrySet().stream() .filter(e -> Objects.nonNull(e.getValue())) - .map(e -> newCriterion(e.getKey(), e.getValue(), Condition.EQUAL)) + .map(e -> buildCriterion(e.getKey(), Condition.EQUAL, e.getValue())) .collect(Collectors.toCollection(CriterionArray::new)); return new Filter() .setOr( @@ -1189,12 +1190,6 @@ public static Filter newFilter(@Nullable Map params) { ImmutableList.of(new ConjunctiveCriterion().setAnd(criteria)))); } - @Nonnull - public static Criterion newCriterion( - @Nonnull String field, @Nonnull String value, @Nonnull Condition condition) { - return new Criterion().setField(field).setValue(value).setCondition(condition); - } - @Nonnull public static Filter filterOrDefaultEmptyFilter(@Nullable Filter filter) { return filter != null ? filter : new Filter().setOr(new ConjunctiveCriterionArray()); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index 1690185324560..74c15d1f35889 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -22,6 +22,7 @@ import com.datahub.authorization.EntitySpec; import com.linkedin.metadata.resources.restli.RestliUtils; +import com.linkedin.metadata.utils.CriterionUtils; import com.linkedin.metadata.utils.SystemMetadataUtils; import io.datahubproject.metadata.context.RequestContext; import io.datahubproject.metadata.services.RestrictedService; @@ -53,7 +54,6 @@ import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.SortCriterion; -import com.linkedin.metadata.resources.restli.RestliUtils; import com.linkedin.metadata.run.AspectRowSummary; import com.linkedin.metadata.run.AspectRowSummaryArray; import com.linkedin.metadata.run.DeleteEntityResponse; @@ -984,16 +984,16 @@ private Long deleteTimeseriesAspects( // Construct the filter. List criteria = new ArrayList<>(); - criteria.add(QueryUtils.newCriterion("urn", urn.toString())); + criteria.add(CriterionUtils.buildCriterion("urn", Condition.EQUAL, urn.toString())); if (startTimeMillis != null) { criteria.add( - QueryUtils.newCriterion( - ES_FIELD_TIMESTAMP, startTimeMillis.toString(), Condition.GREATER_THAN_OR_EQUAL_TO)); + CriterionUtils.buildCriterion( + ES_FIELD_TIMESTAMP, Condition.GREATER_THAN_OR_EQUAL_TO, startTimeMillis.toString())); } if (endTimeMillis != null) { criteria.add( - QueryUtils.newCriterion( - ES_FIELD_TIMESTAMP, endTimeMillis.toString(), Condition.LESS_THAN_OR_EQUAL_TO)); + CriterionUtils.buildCriterion( + ES_FIELD_TIMESTAMP, Condition.LESS_THAN_OR_EQUAL_TO, endTimeMillis.toString())); } final Filter filter = QueryUtils.getFilterFromCriteria(criteria); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java index ea329ce0809fb..c1c41f0996f9f 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/operations/OperationsResource.java @@ -2,16 +2,14 @@ import static com.datahub.authorization.AuthUtil.isAPIAuthorized; import static com.linkedin.metadata.resources.restli.RestliConstants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.codahale.metrics.MetricRegistry; import com.datahub.authentication.Authentication; import com.datahub.authentication.AuthenticationContext; -import com.datahub.authorization.EntitySpec; import com.datahub.plugins.auth.authorization.Authorizer; import com.google.common.annotations.VisibleForTesting; -import com.linkedin.common.urn.Urn; import com.linkedin.metadata.aspect.VersionedAspect; -import com.linkedin.metadata.authorization.Disjunctive; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.query.filter.Condition; @@ -37,7 +35,6 @@ import io.opentelemetry.extension.annotations.WithSpan; import java.util.ArrayList; import java.util.List; -import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.inject.Inject; @@ -246,8 +243,8 @@ String executeTruncateTimeseriesAspect( List criteria = new ArrayList<>(); criteria.add( - QueryUtils.newCriterion( - "timestampMillis", String.valueOf(endTimeMillis), Condition.LESS_THAN_OR_EQUAL_TO)); + buildCriterion( + "timestampMillis", Condition.LESS_THAN_OR_EQUAL_TO, String.valueOf(endTimeMillis))); final Filter filter = QueryUtils.getFilterFromCriteria(criteria); long numToDelete = _timeseriesAspectService.countByFilter(opContext, entityType, aspectName, filter); @@ -289,8 +286,8 @@ String executeTruncateTimeseriesAspect( // count List reindexCriteria = new ArrayList<>(); reindexCriteria.add( - QueryUtils.newCriterion( - "timestampMillis", String.valueOf(endTimeMillis), Condition.GREATER_THAN)); + buildCriterion( + "timestampMillis", Condition.GREATER_THAN, String.valueOf(endTimeMillis))); final Filter reindexFilter = QueryUtils.getFilterFromCriteria(reindexCriteria); String taskId = diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java index 145e71b4371e6..a3c57a19eddd5 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.entity; import static com.linkedin.metadata.search.utils.QueryUtils.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.datahub.util.RecordUtils; import com.google.common.collect.ImmutableList; @@ -30,7 +31,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.query.filter.RelationshipDirection; @@ -571,25 +571,15 @@ private AssetScrollResult getAssetsReferencingUrn( // first, get all entities with this form assigned on it final CriterionArray incompleteFormsArray = new CriterionArray(); incompleteFormsArray.add( - new Criterion() - .setField("incompleteForms") - .setValue(deletedUrn.toString()) - .setCondition(Condition.EQUAL)); + buildCriterion("incompleteForms", Condition.EQUAL, deletedUrn.toString())); final CriterionArray completedFormsArray = new CriterionArray(); completedFormsArray.add( - new Criterion() - .setField("completedForms") - .setValue(deletedUrn.toString()) - .setCondition(Condition.EQUAL)); + buildCriterion("completedForms", Condition.EQUAL, deletedUrn.toString())); // next, get all metadata tests created for this form final CriterionArray metadataTestSourceArray = new CriterionArray(); metadataTestSourceArray.add( - new Criterion() - .setField("sourceEntity") - .setValue(deletedUrn.toString()) - .setCondition(Condition.EQUAL)); - metadataTestSourceArray.add( - new Criterion().setField("sourceType").setValue("FORMS").setCondition(Condition.EQUAL)); + buildCriterion("sourceEntity", Condition.EQUAL, deletedUrn.toString())); + metadataTestSourceArray.add(buildCriterion("sourceType", Condition.EQUAL, "FORMS")); Filter filter = new Filter() .setOr( diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java index da4dd9d76d451..c1593088a2dd7 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java @@ -1,10 +1,12 @@ package com.linkedin.metadata.recommendation.candidatesource; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.query.filter.Criterion; +import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.recommendation.ContentParams; @@ -167,9 +169,8 @@ private RecommendationContent buildRecommendationContent(T candidate, long c .setFilters( new CriterionArray( ImmutableList.of( - new Criterion() - .setField(getSearchFieldName()) - .setValue(candidate.toString())))); + buildCriterion( + getSearchFieldName(), Condition.EQUAL, candidate.toString())))); ContentParams contentParams = new ContentParams().setCount(count); RecommendationContent content = new RecommendationContent(); if (candidate instanceof Urn) { diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java index f6a37f958c30d..7e9d1701bf79a 100644 --- a/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java @@ -1,12 +1,12 @@ package com.linkedin.metadata.search.utils; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import com.datahub.util.ModelUtils; import com.google.common.collect.ImmutableList; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.data.template.StringArray; import com.linkedin.metadata.aspect.AspectVersion; import com.linkedin.metadata.config.DataHubAppConfiguration; import com.linkedin.metadata.models.EntitySpec; @@ -32,7 +32,6 @@ import java.util.stream.Stream; import javax.annotation.Nonnull; import javax.annotation.Nullable; -import javax.validation.constraints.Null; import org.apache.commons.collections.CollectionUtils; public class QueryUtils { @@ -41,43 +40,6 @@ public class QueryUtils { private QueryUtils() {} - // Creates new Criterion with field and value, using EQUAL condition. - @Nonnull - public static Criterion newCriterion(@Nonnull String field, @Nonnull String value) { - return newCriterion(field, value, Condition.EQUAL); - } - - // Creates new Criterion with field, value and condition. - @Nonnull - public static Criterion newCriterion( - @Nonnull String field, @Nonnull String value, @Nonnull Condition condition) { - return new Criterion() - .setField(field) - .setValue(value) - .setValues(new StringArray(ImmutableList.of(value))) - .setCondition(condition); - } - - // Creates new Criterion with field and value, using EQUAL condition. - @Nullable - public static Criterion newCriterion(@Nonnull String field, @Nonnull List values) { - return newCriterion(field, values, Condition.EQUAL); - } - - // Creates new Criterion with field, value and condition. - @Null - public static Criterion newCriterion( - @Nonnull String field, @Nonnull List values, @Nonnull Condition condition) { - if (values.isEmpty()) { - return null; - } - return new Criterion() - .setField(field) - .setValue(values.get(0)) // Hack! This is due to bad modeling. - .setValues(new StringArray(values)) - .setCondition(condition); - } - // Creates new Filter from a map of Criteria by removing null-valued Criteria and using EQUAL // condition (default). @Nonnull @@ -88,7 +50,7 @@ public static Filter newFilter(@Nullable Map params) { CriterionArray criteria = params.entrySet().stream() .filter(e -> Objects.nonNull(e.getValue())) - .map(e -> newCriterion(e.getKey(), e.getValue())) + .map(e -> buildCriterion(e.getKey(), Condition.EQUAL, e.getValue())) .collect(Collectors.toCollection(CriterionArray::new)); return new Filter() .setOr( @@ -259,32 +221,27 @@ public static Filter buildFilterWithUrns( // Prevent increasing the query size by avoiding querying multiple fields with the // same URNs Criterion urnMatchCriterion = - new Criterion() - .setField("urn") - .setValue("") - .setValues( - new StringArray( - urns.stream() - .filter( - urn -> - !schemaFieldEnabled - || !urn.getEntityType().equals(SCHEMA_FIELD_ENTITY_NAME)) - .map(Object::toString) - .collect(Collectors.toList()))); + buildCriterion( + "urn", + Condition.EQUAL, + urns.stream() + .filter( + urn -> + !schemaFieldEnabled + || !urn.getEntityType().equals(SCHEMA_FIELD_ENTITY_NAME)) + .map(Object::toString) + .collect(Collectors.toList())); Criterion schemaUrnAliasCriterion = - new Criterion() - .setField(String.format("%s.keyword", SCHEMA_FIELD_ALIASES_ASPECT)) - .setValue("") - .setValues( - new StringArray( - urns.stream() - .filter( - urn -> - schemaFieldEnabled - && urn.getEntityType().equals(SCHEMA_FIELD_ENTITY_NAME)) - .map(Object::toString) - .collect(Collectors.toList()))); + buildCriterion( + String.format("%s.keyword", SCHEMA_FIELD_ALIASES_ASPECT), + Condition.EQUAL, + urns.stream() + .filter( + urn -> + schemaFieldEnabled && urn.getEntityType().equals(SCHEMA_FIELD_ENTITY_NAME)) + .map(Object::toString) + .collect(Collectors.toList())); if (inputFilters == null || CollectionUtils.isEmpty(inputFilters.getOr())) { return QueryUtils.newDisjunctiveFilter(urnMatchCriterion, schemaUrnAliasCriterion); diff --git a/metadata-service/services/src/test/java/com/linkedin/metadata/service/ViewServiceTest.java b/metadata-service/services/src/test/java/com/linkedin/metadata/service/ViewServiceTest.java index d4b91b717d2f8..ea8d3792a1396 100644 --- a/metadata-service/services/src/test/java/com/linkedin/metadata/service/ViewServiceTest.java +++ b/metadata-service/services/src/test/java/com/linkedin/metadata/service/ViewServiceTest.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.service; import static com.linkedin.metadata.Constants.*; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; @@ -21,7 +22,6 @@ import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; -import com.linkedin.metadata.query.filter.Criterion; import com.linkedin.metadata.query.filter.CriterionArray; import com.linkedin.metadata.query.filter.Filter; import com.linkedin.metadata.utils.GenericRecordUtils; @@ -69,10 +69,8 @@ private void testCreateViewSuccess() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setCondition(Condition.EQUAL) - .setValue("value")))))))), + buildCriterion( + "field", Condition.EQUAL, "value")))))))), 0L); Assert.assertEquals(urn, TEST_VIEW_URN); @@ -99,10 +97,8 @@ private void testCreateViewSuccess() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setCondition(Condition.EQUAL) - .setValue("value")))))))), + buildCriterion( + "field", Condition.EQUAL, "value")))))))), 0L); Assert.assertEquals(urn, TEST_VIEW_URN); @@ -138,10 +134,10 @@ private void testCreateViewErrorMissingInputs() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setCondition(Condition.EQUAL) - .setValue("value")))))))), + buildCriterion( + "field", + Condition.EQUAL, + "value")))))))), 0L)); // Case 2: missing View name @@ -166,10 +162,10 @@ private void testCreateViewErrorMissingInputs() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setCondition(Condition.EQUAL) - .setValue("value")))))))), + buildCriterion( + "field", + Condition.EQUAL, + "value")))))))), 0L)); // Case 3: missing View definition @@ -218,10 +214,10 @@ private void testCreateViewError() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setCondition(Condition.EQUAL) - .setValue("value")))))))), + buildCriterion( + "field", + Condition.EQUAL, + "value")))))))), 1L)); } @@ -264,10 +260,8 @@ private void testUpdateViewSuccess() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setCondition(Condition.EQUAL) - .setValue("value")))))))); + buildCriterion( + "field", Condition.EQUAL, "value")))))))); // Case 1: Update name only service.updateView(opContext, TEST_VIEW_URN, newName, null, null, 1L); @@ -441,10 +435,8 @@ private void testGetViewInfoSuccess() throws Exception { .setAnd( new CriterionArray( ImmutableList.of( - new Criterion() - .setField("field") - .setCondition(Condition.EQUAL) - .setValue("value")))))))); + buildCriterion( + "field", Condition.EQUAL, "value")))))))); resetGetViewInfoMockEntityClient( mockClient, TEST_VIEW_URN, type, name, description, definition, TEST_USER_URN, 0L, 1L); diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/CriterionUtils.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/CriterionUtils.java new file mode 100644 index 0000000000000..e40c4af1e0ae7 --- /dev/null +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/CriterionUtils.java @@ -0,0 +1,71 @@ +package com.linkedin.metadata.utils; + +import com.linkedin.data.template.StringArray; +import com.linkedin.metadata.query.filter.Condition; +import com.linkedin.metadata.query.filter.Criterion; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; + +public class CriterionUtils { + private CriterionUtils() {} + + public static Criterion buildExistsCriterion(@Nonnull String field) { + return buildCriterion(field, Condition.EXISTS, false, Collections.emptyList()); + } + + public static Criterion buildNotExistsCriterion(@Nonnull String field) { + return buildCriterion(field, Condition.EXISTS, true, Collections.emptyList()); + } + + public static Criterion buildIsNullCriterion(@Nonnull String field) { + return buildCriterion(field, Condition.IS_NULL, false, Collections.emptyList()); + } + + public static Criterion buildIsNotNullCriterion(@Nonnull String field) { + return buildCriterion(field, Condition.IS_NULL, true, Collections.emptyList()); + } + + public static Criterion buildCriterion( + @Nonnull String field, @Nonnull Condition condition, String... values) { + return buildCriterion( + field, + condition, + null, + values == null + ? Collections.emptyList() + : Arrays.stream(values).collect(Collectors.toList())); + } + + public static Criterion buildCriterion( + @Nonnull String field, @Nonnull Condition condition, Collection values) { + return buildCriterion(field, condition, false, values); + } + + public static Criterion buildCriterion( + @Nonnull String field, @Nonnull Condition condition, boolean negated, String... values) { + return buildCriterion( + field, + condition, + negated, + values == null + ? Collections.emptyList() + : Arrays.stream(values).collect(Collectors.toList())); + } + + public static Criterion buildCriterion( + @Nonnull String field, + @Nonnull Condition condition, + Boolean negated, + Collection values) { + Criterion criterion = new Criterion(); + criterion.setField(field); + criterion.setCondition(condition); + criterion.setNegated(negated != null ? negated : false); + criterion.setValues(values != null ? new StringArray(values) : new StringArray()); + criterion.setValue(""); // deprecated + return criterion; + } +} diff --git a/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java b/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java index aa18124c826da..b115d11a30739 100644 --- a/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java +++ b/metadata-utils/src/main/java/com/linkedin/metadata/utils/SearchUtil.java @@ -1,7 +1,9 @@ package com.linkedin.metadata.utils; +import static com.linkedin.metadata.utils.CriterionUtils.buildCriterion; + import com.linkedin.common.urn.Urn; -import com.linkedin.data.template.StringArray; +import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; import com.linkedin.metadata.query.filter.Criterion; @@ -70,16 +72,14 @@ public static FilterValue createFilterValue(String value, Long facetCount, Boole private static Criterion transformEntityTypeCriterion( Criterion criterion, IndexConvention indexConvention) { - return criterion - .setField(ES_INDEX_FIELD) - .setValues( - new StringArray( - criterion.getValues().stream() - .map(value -> String.join("", value.split("_"))) - .map(indexConvention::getEntityIndexName) - .collect(Collectors.toList()))) - .setValue( - indexConvention.getEntityIndexName(String.join("", criterion.getValue().split("_")))); + return buildCriterion( + ES_INDEX_FIELD, + Condition.EQUAL, + criterion.isNegated(), + criterion.getValues().stream() + .map(value -> String.join("", value.split("_"))) + .map(indexConvention::getEntityIndexName) + .collect(Collectors.toList())); } private static ConjunctiveCriterion transformConjunctiveCriterion( From edd2831caa612e4288f1938777d00830f59064cb Mon Sep 17 00:00:00 2001 From: Meenakshi Kamalaseshan Radha <62914384+mkamalas@users.noreply.github.com> Date: Sat, 28 Sep 2024 00:41:14 +0530 Subject: [PATCH 7/8] feat(auth) - Manage Children Glossary term authorization check for Owner, Domain, Remove link (#11337) --- .../datahub/graphql/GmsGraphQLEngine.java | 17 +++++++----- .../resolvers/domain/SetDomainResolver.java | 2 +- .../resolvers/domain/UnsetDomainResolver.java | 2 +- .../glossary/AddRelatedTermsResolver.java | 3 +-- .../DeleteGlossaryEntityResolver.java | 3 +-- .../glossary/RemoveRelatedTermsResolver.java | 3 +-- .../resolvers/mutate/AddLinkResolver.java | 17 +----------- .../resolvers/mutate/AddOwnerResolver.java | 4 ++- .../resolvers/mutate/AddOwnersResolver.java | 5 +++- .../mutate/BatchAddOwnersResolver.java | 4 ++- .../mutate/BatchRemoveOwnersResolver.java | 4 ++- .../mutate/BatchSetDomainResolver.java | 4 ++- .../resolvers/mutate/RemoveLinkResolver.java | 6 ++++- .../resolvers/mutate/RemoveOwnerResolver.java | 4 ++- .../resolvers/mutate/UpdateNameResolver.java | 6 ++--- .../resolvers/mutate/util/DomainUtils.java | 7 ++++- .../resolvers/mutate/util/GlossaryUtils.java | 15 +++++++++++ .../resolvers/mutate/util/OwnerUtils.java | 8 +++++- .../domain/BatchSetDomainResolverTest.java | 22 +++++++++++----- .../owner/AddOwnersResolverTest.java | 26 +++++++++++++------ .../owner/BatchAddOwnersResolverTest.java | 19 +++++++++----- .../owner/BatchRemoveOwnersResolverTest.java | 16 ++++++++---- 22 files changed, 127 insertions(+), 70 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index d38c1030b61be..5b265b6714452 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -1192,13 +1192,15 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { .dataFetcher( "updateDescription", new UpdateDescriptionResolver(entityService, this.entityClient)) - .dataFetcher("addOwner", new AddOwnerResolver(entityService)) - .dataFetcher("addOwners", new AddOwnersResolver(entityService)) - .dataFetcher("batchAddOwners", new BatchAddOwnersResolver(entityService)) - .dataFetcher("removeOwner", new RemoveOwnerResolver(entityService)) - .dataFetcher("batchRemoveOwners", new BatchRemoveOwnersResolver(entityService)) + .dataFetcher("addOwner", new AddOwnerResolver(entityService, entityClient)) + .dataFetcher("addOwners", new AddOwnersResolver(entityService, entityClient)) + .dataFetcher( + "batchAddOwners", new BatchAddOwnersResolver(entityService, entityClient)) + .dataFetcher("removeOwner", new RemoveOwnerResolver(entityService, entityClient)) + .dataFetcher( + "batchRemoveOwners", new BatchRemoveOwnersResolver(entityService, entityClient)) .dataFetcher("addLink", new AddLinkResolver(entityService, this.entityClient)) - .dataFetcher("removeLink", new RemoveLinkResolver(entityService)) + .dataFetcher("removeLink", new RemoveLinkResolver(entityService, entityClient)) .dataFetcher("addGroupMembers", new AddGroupMembersResolver(this.groupService)) .dataFetcher("removeGroupMembers", new RemoveGroupMembersResolver(this.groupService)) .dataFetcher("createGroup", new CreateGroupResolver(this.groupService)) @@ -1212,7 +1214,8 @@ private void configureMutationResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("deleteDomain", new DeleteDomainResolver(entityClient)) .dataFetcher( "setDomain", new SetDomainResolver(this.entityClient, this.entityService)) - .dataFetcher("batchSetDomain", new BatchSetDomainResolver(this.entityService)) + .dataFetcher( + "batchSetDomain", new BatchSetDomainResolver(this.entityService, entityClient)) .dataFetcher( "updateDeprecation", new UpdateDeprecationResolver(this.entityClient, this.entityService)) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolver.java index 6ada447ca59ee..adbaae368a418 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/SetDomainResolver.java @@ -44,7 +44,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DomainUtils.isAuthorizedToUpdateDomainsForEntity( - environment.getContext(), entityUrn)) { + environment.getContext(), entityUrn, _entityClient)) { throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolver.java index 783cf250a7ca6..b0bb206a8827b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/domain/UnsetDomainResolver.java @@ -43,7 +43,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw return GraphQLConcurrencyUtils.supplyAsync( () -> { if (!DomainUtils.isAuthorizedToUpdateDomainsForEntity( - environment.getContext(), entityUrn)) { + environment.getContext(), entityUrn, _entityClient)) { throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolver.java index 1e99ea120354e..69ad8658b23ba 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/AddRelatedTermsResolver.java @@ -46,8 +46,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw return GraphQLConcurrencyUtils.supplyAsync( () -> { - final Urn parentUrn = GlossaryUtils.getParentUrn(urn, context, _entityClient); - if (GlossaryUtils.canManageChildrenEntities(context, parentUrn, _entityClient)) { + if (GlossaryUtils.canUpdateGlossaryEntity(urn, context, _entityClient)) { try { final TermRelationshipType relationshipType = input.getRelationshipType(); final List termUrns = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java index 26f0c61de1b0f..c663bd2cf9b9f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/DeleteGlossaryEntityResolver.java @@ -29,11 +29,10 @@ public CompletableFuture get(final DataFetchingEnvironment environment) throws Exception { final QueryContext context = environment.getContext(); final Urn entityUrn = Urn.createFromString(environment.getArgument("urn")); - final Urn parentNodeUrn = GlossaryUtils.getParentUrn(entityUrn, context, _entityClient); return GraphQLConcurrencyUtils.supplyAsync( () -> { - if (GlossaryUtils.canManageChildrenEntities(context, parentNodeUrn, _entityClient)) { + if (GlossaryUtils.canUpdateGlossaryEntity(entityUrn, context, _entityClient)) { if (!_entityService.exists(context.getOperationContext(), entityUrn, true)) { throw new RuntimeException(String.format("This urn does not exist: %s", entityUrn)); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolver.java index 59f820d7cbd36..4e1ffcc00cd89 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/glossary/RemoveRelatedTermsResolver.java @@ -42,8 +42,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw return GraphQLConcurrencyUtils.supplyAsync( () -> { - final Urn parentUrn = GlossaryUtils.getParentUrn(urn, context, _entityClient); - if (GlossaryUtils.canManageChildrenEntities(context, parentUrn, _entityClient)) { + if (GlossaryUtils.canUpdateGlossaryEntity(urn, context, _entityClient)) { try { final TermRelationshipType relationshipType = input.getRelationshipType(); final List termUrnsToRemove = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddLinkResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddLinkResolver.java index 5cffcd9c35c00..c71832b956a7c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddLinkResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddLinkResolver.java @@ -11,7 +11,6 @@ import com.linkedin.datahub.graphql.resolvers.mutate.util.GlossaryUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.LinkUtils; import com.linkedin.entity.client.EntityClient; -import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -36,7 +35,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw Urn targetUrn = Urn.createFromString(input.getResourceUrn()); if (!LinkUtils.isAuthorizedToUpdateLinks(context, targetUrn) - && !canUpdateGlossaryEntityLinks(targetUrn, context)) { + && !GlossaryUtils.canUpdateGlossaryEntity(targetUrn, context, _entityClient)) { throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); } @@ -70,18 +69,4 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw this.getClass().getSimpleName(), "get"); } - - // Returns whether this is a glossary entity and whether you can edit this glossary entity with - // the - // Manage all children or Manage direct children privileges - private boolean canUpdateGlossaryEntityLinks(Urn targetUrn, QueryContext context) { - final boolean isGlossaryEntity = - targetUrn.getEntityType().equals(Constants.GLOSSARY_TERM_ENTITY_NAME) - || targetUrn.getEntityType().equals(Constants.GLOSSARY_NODE_ENTITY_NAME); - if (!isGlossaryEntity) { - return false; - } - final Urn parentNodeUrn = GlossaryUtils.getParentUrn(targetUrn, context, _entityClient); - return GlossaryUtils.canManageChildrenEntities(context, parentNodeUrn, _entityClient); - } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java index 7c0f7b3757ee9..db58236a4e615 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnerResolver.java @@ -11,6 +11,7 @@ import com.linkedin.datahub.graphql.generated.OwnerInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -23,6 +24,7 @@ public class AddOwnerResolver implements DataFetcher> { private final EntityService _entityService; + private final EntityClient _entityClient; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -41,7 +43,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw } OwnerInput ownerInput = ownerInputBuilder.build(); - OwnerUtils.validateAuthorizedToUpdateOwners(context, targetUrn); + OwnerUtils.validateAuthorizedToUpdateOwners(context, targetUrn, _entityClient); return GraphQLConcurrencyUtils.supplyAsync( () -> { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java index ade4e7b744801..329b0abf31149 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/AddOwnersResolver.java @@ -11,6 +11,7 @@ import com.linkedin.datahub.graphql.generated.OwnerInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -24,6 +25,7 @@ public class AddOwnersResolver implements DataFetcher> { private final EntityService _entityService; + private final EntityClient _entityClient; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -35,7 +37,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw return GraphQLConcurrencyUtils.supplyAsync( () -> { - OwnerUtils.validateAuthorizedToUpdateOwners(environment.getContext(), targetUrn); + OwnerUtils.validateAuthorizedToUpdateOwners( + environment.getContext(), targetUrn, _entityClient); OwnerUtils.validateAddOwnerInput( context.getOperationContext(), owners, targetUrn, _entityService); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java index 28daef1b11062..3f0f5e0b9edcf 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchAddOwnersResolver.java @@ -11,6 +11,7 @@ import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.LabelUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -27,6 +28,7 @@ public class BatchAddOwnersResolver implements DataFetcher> { private final EntityService _entityService; + private final EntityClient _entityClient; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -80,7 +82,7 @@ private void validateInputResource( "Malformed input provided: owners cannot be applied to subresources."); } - OwnerUtils.validateAuthorizedToUpdateOwners(context, resourceUrn); + OwnerUtils.validateAuthorizedToUpdateOwners(context, resourceUrn, _entityClient); LabelUtils.validateResource( opContext, resourceUrn, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java index 5aaace4e21e9c..4772b3ef27ac9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchRemoveOwnersResolver.java @@ -10,6 +10,7 @@ import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.LabelUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -24,6 +25,7 @@ public class BatchRemoveOwnersResolver implements DataFetcher> { private final EntityService _entityService; + private final EntityClient _entityClient; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -72,7 +74,7 @@ private void validateInputResource(ResourceRefInput resource, QueryContext conte "Malformed input provided: owners cannot be removed from subresources."); } - OwnerUtils.validateAuthorizedToUpdateOwners(context, resourceUrn); + OwnerUtils.validateAuthorizedToUpdateOwners(context, resourceUrn, _entityClient); LabelUtils.validateResource( context.getOperationContext(), resourceUrn, diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchSetDomainResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchSetDomainResolver.java index abbeed29545e4..3cf8e801d4171 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchSetDomainResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/BatchSetDomainResolver.java @@ -11,6 +11,7 @@ import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.DomainUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.LabelUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -28,6 +29,7 @@ public class BatchSetDomainResolver implements DataFetcher> { private final EntityService _entityService; + private final EntityClient _entityClient; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -74,7 +76,7 @@ private void validateInputResources(List resources, QueryConte private void validateInputResource(ResourceRefInput resource, QueryContext context) { final Urn resourceUrn = UrnUtils.getUrn(resource.getResourceUrn()); - if (!DomainUtils.isAuthorizedToUpdateDomainsForEntity(context, resourceUrn)) { + if (!DomainUtils.isAuthorizedToUpdateDomainsForEntity(context, resourceUrn, _entityClient)) { throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveLinkResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveLinkResolver.java index e047a24a0adaa..584a0e3e9c2aa 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveLinkResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveLinkResolver.java @@ -8,7 +8,9 @@ import com.linkedin.datahub.graphql.concurrency.GraphQLConcurrencyUtils; import com.linkedin.datahub.graphql.exception.AuthorizationException; import com.linkedin.datahub.graphql.generated.RemoveLinkInput; +import com.linkedin.datahub.graphql.resolvers.mutate.util.GlossaryUtils; import com.linkedin.datahub.graphql.resolvers.mutate.util.LinkUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -21,6 +23,7 @@ public class RemoveLinkResolver implements DataFetcher> { private final EntityService _entityService; + private final EntityClient _entityClient; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -31,7 +34,8 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw String linkUrl = input.getLinkUrl(); Urn targetUrn = Urn.createFromString(input.getResourceUrn()); - if (!LinkUtils.isAuthorizedToUpdateLinks(context, targetUrn)) { + if (!LinkUtils.isAuthorizedToUpdateLinks(context, targetUrn) + && !GlossaryUtils.canUpdateGlossaryEntity(targetUrn, context, _entityClient)) { throw new AuthorizationException( "Unauthorized to perform this action. Please contact your DataHub administrator."); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java index 8d14884885572..dfb4778644321 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/RemoveOwnerResolver.java @@ -10,6 +10,7 @@ import com.linkedin.datahub.graphql.generated.RemoveOwnerInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.entity.EntityService; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; @@ -22,6 +23,7 @@ public class RemoveOwnerResolver implements DataFetcher> { private final EntityService _entityService; + private final EntityClient _entityClient; @Override public CompletableFuture get(DataFetchingEnvironment environment) throws Exception { @@ -36,7 +38,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw ? null : Urn.createFromString(input.getOwnershipTypeUrn()); - OwnerUtils.validateAuthorizedToUpdateOwners(context, targetUrn); + OwnerUtils.validateAuthorizedToUpdateOwners(context, targetUrn, _entityClient); return GraphQLConcurrencyUtils.supplyAsync( () -> { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java index ad6dbbe635ed1..87aad3f767d95 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/UpdateNameResolver.java @@ -87,8 +87,7 @@ public CompletableFuture get(DataFetchingEnvironment environment) throw private Boolean updateGlossaryTermName( Urn targetUrn, UpdateNameInput input, QueryContext context) { - final Urn parentNodeUrn = GlossaryUtils.getParentUrn(targetUrn, context, _entityClient); - if (GlossaryUtils.canManageChildrenEntities(context, parentNodeUrn, _entityClient)) { + if (GlossaryUtils.canUpdateGlossaryEntity(targetUrn, context, _entityClient)) { try { GlossaryTermInfo glossaryTermInfo = (GlossaryTermInfo) @@ -123,8 +122,7 @@ private Boolean updateGlossaryTermName( private Boolean updateGlossaryNodeName( Urn targetUrn, UpdateNameInput input, QueryContext context) { - final Urn parentNodeUrn = GlossaryUtils.getParentUrn(targetUrn, context, _entityClient); - if (GlossaryUtils.canManageChildrenEntities(context, parentNodeUrn, _entityClient)) { + if (GlossaryUtils.canUpdateGlossaryEntity(targetUrn, context, _entityClient)) { try { GlossaryNodeInfo glossaryNodeInfo = (GlossaryNodeInfo) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java index 4224f75773200..bf94585467814 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/DomainUtils.java @@ -60,7 +60,12 @@ public class DomainUtils { private DomainUtils() {} public static boolean isAuthorizedToUpdateDomainsForEntity( - @Nonnull QueryContext context, Urn entityUrn) { + @Nonnull QueryContext context, Urn entityUrn, EntityClient entityClient) { + + if (GlossaryUtils.canUpdateGlossaryEntity(entityUrn, context, entityClient)) { + return true; + } + final DisjunctivePrivilegeGroup orPrivilegeGroups = new DisjunctivePrivilegeGroup( ImmutableList.of( diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/GlossaryUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/GlossaryUtils.java index 0d8e505a948e5..9ff908b4ee37f 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/GlossaryUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/GlossaryUtils.java @@ -36,6 +36,21 @@ public static boolean canManageGlossaries(@Nonnull QueryContext context) { context.getOperationContext(), PoliciesConfig.MANAGE_GLOSSARIES_PRIVILEGE); } + // Returns whether this is a glossary entity and whether you can edit this glossary entity with + // the + // Manage all children or Manage direct children privileges + public static boolean canUpdateGlossaryEntity( + Urn targetUrn, QueryContext context, EntityClient entityClient) { + final boolean isGlossaryEntity = + targetUrn.getEntityType().equals(Constants.GLOSSARY_TERM_ENTITY_NAME) + || targetUrn.getEntityType().equals(Constants.GLOSSARY_NODE_ENTITY_NAME); + if (!isGlossaryEntity) { + return false; + } + final Urn parentNodeUrn = GlossaryUtils.getParentUrn(targetUrn, context, entityClient); + return GlossaryUtils.canManageChildrenEntities(context, parentNodeUrn, entityClient); + } + /** * Returns true if the current user is able to create, delete, or move Glossary Terms and Nodes * under a parent Node. They can do this with either the global MANAGE_GLOSSARIES privilege, or if diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java index 2f2b52f7ab586..b9a12a19f617a 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java @@ -20,6 +20,7 @@ import com.linkedin.datahub.graphql.generated.OwnerInput; import com.linkedin.datahub.graphql.generated.OwnershipType; import com.linkedin.datahub.graphql.generated.ResourceRefInput; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.EntityService; @@ -195,7 +196,12 @@ private static void removeOwnersIfExists( } public static void validateAuthorizedToUpdateOwners( - @Nonnull QueryContext context, Urn resourceUrn) { + @Nonnull QueryContext context, Urn resourceUrn, EntityClient entityClient) { + + if (GlossaryUtils.canUpdateGlossaryEntity(resourceUrn, context, entityClient)) { + return; + } + final DisjunctivePrivilegeGroup orPrivilegeGroups = new DisjunctivePrivilegeGroup( ImmutableList.of( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java index 1a9272c1335cf..82b40154e07d2 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/domain/BatchSetDomainResolverTest.java @@ -16,6 +16,7 @@ import com.linkedin.datahub.graphql.resolvers.mutate.BatchSetDomainResolver; import com.linkedin.datahub.graphql.resolvers.mutate.MutationUtils; import com.linkedin.domain.Domains; +import com.linkedin.entity.client.EntityClient; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; @@ -40,6 +41,7 @@ public class BatchSetDomainResolverTest { @Test public void testGetSuccessNoExistingDomains() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -67,7 +69,7 @@ public void testGetSuccessNoExistingDomains() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_DOMAIN_2_URN)), eq(true))) .thenReturn(true); - BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); + BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -106,6 +108,7 @@ public void testGetSuccessExistingDomains() throws Exception { .setDomains(new UrnArray(ImmutableList.of(Urn.createFromString(TEST_DOMAIN_1_URN)))); EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -133,7 +136,7 @@ public void testGetSuccessExistingDomains() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_DOMAIN_2_URN)), eq(true))) .thenReturn(true); - BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); + BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -177,6 +180,7 @@ public void testGetSuccessUnsetDomains() throws Exception { .setDomains(new UrnArray(ImmutableList.of(Urn.createFromString(TEST_DOMAIN_1_URN)))); EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -204,7 +208,7 @@ public void testGetSuccessUnsetDomains() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_DOMAIN_2_URN)), eq(true))) .thenReturn(true); - BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); + BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -234,6 +238,7 @@ public void testGetSuccessUnsetDomains() throws Exception { @Test public void testGetFailureDomainDoesNotExist() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -248,7 +253,7 @@ public void testGetFailureDomainDoesNotExist() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_DOMAIN_1_URN)), eq(true))) .thenReturn(false); - BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); + BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -269,6 +274,7 @@ public void testGetFailureDomainDoesNotExist() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -292,7 +298,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_DOMAIN_1_URN)), eq(true))) .thenReturn(true); - BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); + BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -313,8 +319,9 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); - BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); + BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService, mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); @@ -335,12 +342,13 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.doThrow(RuntimeException.class) .when(mockService) .ingestProposal(any(), Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); - BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService); + BatchSetDomainResolver resolver = new BatchSetDomainResolver(mockService, mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java index b239e0300ffcc..3222cc8c1878f 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/AddOwnersResolverTest.java @@ -20,6 +20,7 @@ import com.linkedin.datahub.graphql.generated.OwnershipType; import com.linkedin.datahub.graphql.resolvers.mutate.AddOwnersResolver; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; @@ -39,6 +40,7 @@ public class AddOwnersResolverTest { @Test public void testGetSuccessNoExistingOwners() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -66,7 +68,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { eq(true))) .thenReturn(true); - AddOwnersResolver resolver = new AddOwnersResolver(mockService); + AddOwnersResolver resolver = new AddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -102,6 +104,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { @Test public void testGetSuccessExistingOwnerNewType() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); com.linkedin.common.Ownership oldOwnership = new Ownership() @@ -138,7 +141,7 @@ public void testGetSuccessExistingOwnerNewType() throws Exception { eq(true))) .thenReturn(true); - AddOwnersResolver resolver = new AddOwnersResolver(mockService); + AddOwnersResolver resolver = new AddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -168,6 +171,7 @@ public void testGetSuccessExistingOwnerNewType() throws Exception { @Test public void testGetSuccessDeprecatedTypeToOwnershipType() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); com.linkedin.common.Ownership oldOwnership = new Ownership() @@ -201,7 +205,7 @@ public void testGetSuccessDeprecatedTypeToOwnershipType() throws Exception { eq(true))) .thenReturn(true); - AddOwnersResolver resolver = new AddOwnersResolver(mockService); + AddOwnersResolver resolver = new AddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -231,6 +235,7 @@ public void testGetSuccessDeprecatedTypeToOwnershipType() throws Exception { @Test public void testGetSuccessMultipleOwnerTypes() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); com.linkedin.common.Ownership oldOwnership = new Ownership() @@ -281,7 +286,7 @@ public void testGetSuccessMultipleOwnerTypes() throws Exception { eq(true))) .thenReturn(true); - AddOwnersResolver resolver = new AddOwnersResolver(mockService); + AddOwnersResolver resolver = new AddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -329,6 +334,7 @@ public void testGetSuccessMultipleOwnerTypes() throws Exception { @Test public void testGetFailureOwnerDoesNotExist() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -343,7 +349,7 @@ public void testGetFailureOwnerDoesNotExist() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_OWNER_1_URN)), eq(true))) .thenReturn(false); - AddOwnersResolver resolver = new AddOwnersResolver(mockService); + AddOwnersResolver resolver = new AddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -367,6 +373,7 @@ public void testGetFailureOwnerDoesNotExist() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -381,7 +388,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_OWNER_1_URN)), eq(true))) .thenReturn(true); - AddOwnersResolver resolver = new AddOwnersResolver(mockService); + AddOwnersResolver resolver = new AddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -405,8 +412,9 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); - AddOwnersResolver resolver = new AddOwnersResolver(mockService); + AddOwnersResolver resolver = new AddOwnersResolver(mockService, mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); @@ -430,12 +438,14 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.doThrow(RuntimeException.class) .when(mockService) .ingestProposal(any(), any(AspectsBatchImpl.class), Mockito.anyBoolean()); - AddOwnersResolver resolver = new AddOwnersResolver(Mockito.mock(EntityService.class)); + AddOwnersResolver resolver = + new AddOwnersResolver(Mockito.mock(EntityService.class), mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java index 8275f9f83ef83..2071b01c10558 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchAddOwnersResolverTest.java @@ -19,6 +19,7 @@ import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.BatchAddOwnersResolver; import com.linkedin.datahub.graphql.resolvers.mutate.util.OwnerUtils; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; @@ -39,6 +40,7 @@ public class BatchAddOwnersResolverTest { @Test public void testGetSuccessNoExistingOwners() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -77,7 +79,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { eq(true))) .thenReturn(true); - BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -127,6 +129,7 @@ public void testGetSuccessExistingOwners() throws Exception { .setOwner(Urn.createFromString(TEST_OWNER_URN_1)) .setType(OwnershipType.TECHNICAL_OWNER)))); EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -176,7 +179,7 @@ public void testGetSuccessExistingOwners() throws Exception { eq(true))) .thenReturn(true); - BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -218,6 +221,7 @@ public void testGetSuccessExistingOwners() throws Exception { @Test public void testGetFailureOwnerDoesNotExist() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -232,7 +236,7 @@ public void testGetFailureOwnerDoesNotExist() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_OWNER_URN_1)), eq(true))) .thenReturn(false); - BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -268,6 +272,7 @@ public void testGetFailureOwnerDoesNotExist() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -291,7 +296,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_OWNER_URN_1)), eq(true))) .thenReturn(true); - BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -327,8 +332,9 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); - BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService, mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); @@ -364,12 +370,13 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.doThrow(RuntimeException.class) .when(mockService) .ingestProposal(any(), Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); - BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService); + BatchAddOwnersResolver resolver = new BatchAddOwnersResolver(mockService, mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java index 9ea9ac693b98e..24380e2e52d84 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/owner/BatchRemoveOwnersResolverTest.java @@ -16,6 +16,7 @@ import com.linkedin.datahub.graphql.generated.BatchRemoveOwnersInput; import com.linkedin.datahub.graphql.generated.ResourceRefInput; import com.linkedin.datahub.graphql.resolvers.mutate.BatchRemoveOwnersResolver; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.batch.AspectsBatchImpl; @@ -36,6 +37,7 @@ public class BatchRemoveOwnersResolverTest { @Test public void testGetSuccessNoExistingOwners() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -62,7 +64,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_OWNER_URN_2)), eq(true))) .thenReturn(true); - BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -84,6 +86,7 @@ public void testGetSuccessNoExistingOwners() throws Exception { @Test public void testGetSuccessExistingOwners() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); final Ownership oldOwners1 = new Ownership() @@ -129,7 +132,7 @@ public void testGetSuccessExistingOwners() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_OWNER_URN_2)), eq(true))) .thenReturn(true); - BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -151,6 +154,7 @@ public void testGetSuccessExistingOwners() throws Exception { @Test public void testGetFailureResourceDoesNotExist() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.when( mockService.getAspect( @@ -174,7 +178,7 @@ public void testGetFailureResourceDoesNotExist() throws Exception { Mockito.when(mockService.exists(any(), eq(Urn.createFromString(TEST_OWNER_URN_1)), eq(true))) .thenReturn(true); - BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService, mockClient); // Execute resolver QueryContext mockContext = getMockAllowContext(); @@ -196,8 +200,9 @@ public void testGetFailureResourceDoesNotExist() throws Exception { @Test public void testGetUnauthorized() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); - BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService, mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); @@ -219,12 +224,13 @@ public void testGetUnauthorized() throws Exception { @Test public void testGetEntityClientException() throws Exception { EntityService mockService = getMockEntityService(); + EntityClient mockClient = Mockito.mock(EntityClient.class); Mockito.doThrow(RuntimeException.class) .when(mockService) .ingestProposal(any(), Mockito.any(AspectsBatchImpl.class), Mockito.anyBoolean()); - BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService); + BatchRemoveOwnersResolver resolver = new BatchRemoveOwnersResolver(mockService, mockClient); // Execute resolver DataFetchingEnvironment mockEnv = Mockito.mock(DataFetchingEnvironment.class); From ee2a9781d63796650450e7392077c436f19edd5b Mon Sep 17 00:00:00 2001 From: Tamas Nemeth Date: Fri, 27 Sep 2024 22:50:04 +0200 Subject: [PATCH 8/8] fix(ingest/dagster): Fixing path to the dagster logo (#11489) Co-authored-by: Harshal Sheth --- docs/lineage/openlineage.md | 12 ++++++++---- .../war/src/main/resources/boot/data_platforms.json | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/lineage/openlineage.md b/docs/lineage/openlineage.md index c91aa7499802c..71374a51d578b 100644 --- a/docs/lineage/openlineage.md +++ b/docs/lineage/openlineage.md @@ -56,9 +56,12 @@ Example: "producer": "https://github.com/OpenLineage/OpenLineage/blob/v1-0-0/client" } ``` + ##### How to set up Airflow -Follow the Airflow guide to setup the Airflow DAGs to send lineage information to DataHub. The guide can be found [here](https://airflow.apache.org/docs/apache-airflow-providers-openlineage/stable/guides/user.html + +Follow the Airflow guide to setup the Airflow DAGs to send lineage information to DataHub. The guide can be found [here](https://airflow.apache.org/docs/apache-airflow-providers-openlineage/stable/guides/user.html). The transport should look like this: + ```json {"type": "http", "url": "https://GMS_SERVER_HOST:GMS_PORT/openapi/openlineage/", @@ -71,12 +74,13 @@ The transport should look like this: ``` #### Known Limitations + With Spark and Airflow we recommend using the Spark Lineage or DataHub's Airflow plugin for tighter integration with DataHub. -- **[PathSpec](https://datahubproject.io/docs/metadata-integration/java/acryl-spark-lineage/#configuring-hdfs-based-dataset-urns) Support**: While the REST endpoint supports OpenLineage messages, full [PathSpec](https://datahubproject.io/docs/metadata-integration/java/acryl-spark-lineage/#configuring-hdfs-based-dataset-urns)) support is not yet available. +- **[PathSpec](https://datahubproject.io/docs/metadata-integration/java/acryl-spark-lineage/#configuring-hdfs-based-dataset-urns) Support**: While the REST endpoint supports OpenLineage messages, full [PathSpec](https://datahubproject.io/docs/metadata-integration/java/acryl-spark-lineage/#configuring-hdfs-based-dataset-urns)) support is not yet available in the OpenLineage endpoint but it is available in the Acryl Spark Plugin. + +etc... -- **Column-level Lineage**: DataHub's current OpenLineage support does not provide full column-level lineage tracking. -- etc... ### 2. Spark Event Listener Plugin DataHub's Spark Event Listener plugin enhances OpenLineage support by providing additional features such as PathSpec support, column-level lineage, and more. diff --git a/metadata-service/war/src/main/resources/boot/data_platforms.json b/metadata-service/war/src/main/resources/boot/data_platforms.json index 0eb6256e3aeee..03f1cf8e6c934 100644 --- a/metadata-service/war/src/main/resources/boot/data_platforms.json +++ b/metadata-service/war/src/main/resources/boot/data_platforms.json @@ -75,7 +75,7 @@ "name": "dagster", "displayName": "Dagster", "type": "OTHERS", - "logoUrl": "/assets/platforms/dagsterlogo.png" + "logoUrl": "/assets/platforms/dagsterlogo.svg" } }, {