Skip to content

Commit

Permalink
fix(search): restore prefix phrase match on quoted search (#11444)
Browse files Browse the repository at this point in the history
  • Loading branch information
david-leifker authored Sep 20, 2024
1 parent 2ceb8e0 commit b17d776
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 35 deletions.
2 changes: 1 addition & 1 deletion docker/profiles/docker-compose.gms.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev
- ${DATAHUB_LOCAL_GMS_ENV:-empty2.env}
environment: &datahub-gms-dev-env
<<: [*datahub-dev-telemetry-env, *datahub-gms-env]
ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-/etc/datahub/search/search_config.yaml}
ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-search_config.yaml}
SKIP_ELASTICSEARCH_CHECK: false
JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001'
BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: false
Expand Down
4 changes: 2 additions & 2 deletions docs/how/search.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ These examples are non exhaustive and using Datasets as a reference.
If you want to:

- Exact match on term or phrase
- ```"datahub_schema"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22datahub_schema%22)
- ```datahub_schema``` [Sample results](https://demo.datahubproject.io/search?page=1&query=datahub_schema)
- ```"pet profile"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22pet%20profile%22)
- ```pet profile``` [Sample results](https://demo.datahubproject.io/search?page=1&query=pet%20profile)
- Enclosing one or more terms with double quotes will enforce exact matching on these terms, preventing further tokenization.

- Exclude terms
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package com.linkedin.metadata.search.fixtures;

import static org.testng.AssertJUnit.assertEquals;

import com.fasterxml.jackson.dataformat.yaml.YAMLMapper;
import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
import org.testng.annotations.Test;

public class SampleDataFixtureSetupTest extends AbstractTestNGSpringContextTests {
private static final String DEFAULT_CONFIG = "search_config.yaml";
private static final String TEST_FIXTURE_CONFIG = "search_config_fixture_test.yml";
private static final YAMLMapper MAPPER = new YAMLMapper();

/**
* Ensure default search configuration matches the test fixture configuration (allowing for some
* differences)
*/
@Test
public void testConfig() throws IOException {
final CustomSearchConfiguration defaultConfig;
final CustomSearchConfiguration fixtureConfig;

try (InputStream stream = new ClassPathResource(DEFAULT_CONFIG).getInputStream()) {
defaultConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class);
}
try (InputStream stream = new ClassPathResource(TEST_FIXTURE_CONFIG).getInputStream()) {
fixtureConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class);

// test specifics
((List<Map<String, Object>>)
fixtureConfig.getQueryConfigurations().get(1).getFunctionScore().get("functions"))
.remove(1);

((List<Map<String, Object>>)
fixtureConfig.getQueryConfigurations().get(2).getFunctionScore().get("functions"))
.remove(1);
}

assertEquals(fixtureConfig, defaultConfig);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -944,7 +944,7 @@ public void testSmokeTestQueries() {
"covid",
2,
"\"raw_orders\"",
6,
1,
STRUCTURED_QUERY_PREFIX + "sample",
3,
STRUCTURED_QUERY_PREFIX + "\"sample\"",
Expand Down Expand Up @@ -1327,24 +1327,24 @@ public void testScrollAcrossEntities() throws IOException {
totalResults += numResults;
scrollId = result.getScrollId();
} while (scrollId != null);
// expect 8 total matching results
assertEquals(totalResults, 8);
// expect 2 total matching results
assertEquals(totalResults, 2);
}

@Test
public void testSearchAcrossMultipleEntities() {
String query = "logging_events";
String query = "logging events";
SearchResult result = search(getOperationContext(), getSearchService(), query);
assertEquals((int) result.getNumEntities(), 8);
assertEquals((int) result.getNumEntities(), 6);
result =
search(
getOperationContext(),
getSearchService(),
List.of(DATASET_ENTITY_NAME, DATA_JOB_ENTITY_NAME),
query);
assertEquals((int) result.getNumEntities(), 8);
assertEquals((int) result.getNumEntities(), 6);
result = search(getOperationContext(), getSearchService(), List.of(DATASET_ENTITY_NAME), query);
assertEquals((int) result.getNumEntities(), 4);
assertEquals((int) result.getNumEntities(), 2);
result =
search(getOperationContext(), getSearchService(), List.of(DATA_JOB_ENTITY_NAME), query);
assertEquals((int) result.getNumEntities(), 4);
Expand Down Expand Up @@ -1706,7 +1706,7 @@ public void testOr() {
assertTrue(
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));
assertEquals(result.getEntities().size(), 8);
assertEquals(result.getEntities().size(), 2);
}

@Test
Expand All @@ -1729,7 +1729,7 @@ public void testNegate() {
assertTrue(
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));
assertEquals(result.getEntities().size(), 8);
assertEquals(result.getEntities().size(), 2);
}

@Test
Expand All @@ -1755,6 +1755,27 @@ public void testPrefix() {
assertEquals(result.getEntities().size(), 8);
}

@Test
public void testQuotedPrefixDescriptionField() {
String query = "\"Constructs the fct_users_deleted\"";
SearchResult result = searchAcrossEntities(getOperationContext(), getSearchService(), query);
assertTrue(
result.hasEntities() && !result.getEntities().isEmpty(),
String.format("%s - Expected search results", query));

assertTrue(
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));
assertEquals(result.getEntities().size(), 4);

assertTrue(
result.getEntities().stream()
.allMatch(
e ->
e.getMatchedFields().stream().anyMatch(m -> m.getName().equals("description"))),
"%s - Expected search results to match on description field based on prefix match");
}

@Test
public void testParens() {
String query = "dbt | (bigquery + covid19)";
Expand Down Expand Up @@ -1878,7 +1899,7 @@ public void testPrefixVsExact() {
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));

assertEquals(result.getEntities().size(), 10);
assertEquals(result.getEntities().size(), 2);
assertEquals(
result.getEntities().get(0).getEntity().toString(),
"urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)",
Expand Down Expand Up @@ -1937,9 +1958,9 @@ public void testColumnExactMatch() {
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));

assertTrue(
result.getEntities().size() > 2,
String.format("%s - Expected search results to have at least two results", query));
assertFalse(
result.getEntities().isEmpty(),
String.format("%s - Expected search results to have at least 1 result.", query));
assertEquals(
result.getEntities().get(0).getEntity().toString(),
"urn:li:dataset:(urn:li:dataPlatform:testOnly," + "important_units" + ",PROD)",
Expand Down
84 changes: 66 additions & 18 deletions metadata-io/src/test/resources/search_config_fixture_test.yml
Original file line number Diff line number Diff line change
@@ -1,51 +1,99 @@
# Use for testing with search fixtures
queryConfigurations:
# Select *
# Select */explore all
# Attempt to rank active incidents at the top followed by enrichment factors
- queryRegex: '[*]|'
simpleQuery: false
prefixMatchQuery: false
exactMatchQuery: false
functionScore:
functions:
- filter:
match_all: {}
weight: 1
term:
hasActiveIncidents:
value: true
weight: 2.0
- filter:
term:
hasDescription:
value: true
weight: 1.25
- filter:
term:
hasOwners:
value: true
weight: 1.25
- filter:
term:
hasDomain:
value: true
weight: 1.1
- filter:
term:
hasGlossaryTerms:
value: true
weight: 1.1
- filter:
term:
hasTags:
value: true
weight: 1.1
- filter:
term:
hasRowCount:
value: true
weight: 1.05
- filter:
term:
materialized:
hasColumnCount:
value: true
weight: 0.5
weight: 1.05
- filter:
term:
deprecated:
value: true
weight: 0.5
score_mode: avg
boost_mode: multiply
weight: 0.25
score_mode: multiply
boost_mode: replace

- queryRegex: .*
simpleQuery: true
# Criteria for exact-match only
# Contains quotes, is a single term with `_`, `.`, or `-` (normally consider for tokenization) then use exact match query
- queryRegex: >-
^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$
simpleQuery: false
prefixMatchQuery: true
exactMatchQuery: true
functionScore:
functions:
- filter:
match_all: {}
weight: 1
- filter:
term:
materialized:
deprecated:
value: true
weight: 0.5
weight: 0.25
- filter:
terms:
tags:
- urn:li:tag:pii
weight: 1.25
score_mode: multiply
boost_mode: multiply
# default
- queryRegex: .*
simpleQuery: true
prefixMatchQuery: true
exactMatchQuery: true
functionScore:
functions:
- filter:
term:
deprecated:
value: true
weight: 0.5
weight: 0.25
- filter:
terms:
tags:
- urn:li:tag:pii
weight: 1.25
score_mode: avg
boost_mode: multiply
score_mode: multiply
boost_mode: multiply
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
import lombok.Builder;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;

@Builder(toBuilder = true)
@Getter
@EqualsAndHashCode
@ToString
@JsonDeserialize(builder = CustomSearchConfiguration.CustomSearchConfigurationBuilder.class)
public class CustomSearchConfiguration {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ queryConfigurations:
- queryRegex: >-
^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$
simpleQuery: false
prefixMatchQuery: false
prefixMatchQuery: true
exactMatchQuery: true
functionScore:
functions:
Expand Down

0 comments on commit b17d776

Please sign in to comment.