Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(search): restore prefix phrase match on quoted search #11444

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/profiles/docker-compose.gms.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ x-datahub-gms-service-dev: &datahub-gms-service-dev
- ${DATAHUB_LOCAL_GMS_ENV:-empty2.env}
environment: &datahub-gms-dev-env
<<: [*datahub-dev-telemetry-env, *datahub-gms-env]
ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-/etc/datahub/search/search_config.yaml}
ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-search_config.yaml}
SKIP_ELASTICSEARCH_CHECK: false
JAVA_TOOL_OPTIONS: '-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001'
BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE: false
Expand Down
4 changes: 2 additions & 2 deletions docs/how/search.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,8 @@ These examples are non exhaustive and using Datasets as a reference.
If you want to:

- Exact match on term or phrase
- ```"datahub_schema"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22datahub_schema%22)
- ```datahub_schema``` [Sample results](https://demo.datahubproject.io/search?page=1&query=datahub_schema)
- ```"pet profile"``` [Sample results](https://demo.datahubproject.io/search?page=1&query=%22pet%20profile%22)
- ```pet profile``` [Sample results](https://demo.datahubproject.io/search?page=1&query=pet%20profile)
- Enclosing one or more terms with double quotes will enforce exact matching on these terms, preventing further tokenization.

- Exclude terms
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package com.linkedin.metadata.search.fixtures;

import static org.testng.AssertJUnit.assertEquals;

import com.fasterxml.jackson.dataformat.yaml.YAMLMapper;
import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
import org.springframework.core.io.ClassPathResource;
import org.springframework.test.context.testng.AbstractTestNGSpringContextTests;
import org.testng.annotations.Test;

public class SampleDataFixtureSetupTest extends AbstractTestNGSpringContextTests {
private static final String DEFAULT_CONFIG = "search_config.yaml";
private static final String TEST_FIXTURE_CONFIG = "search_config_fixture_test.yml";
private static final YAMLMapper MAPPER = new YAMLMapper();

/**
* Ensure default search configuration matches the test fixture configuration (allowing for some
* differences)
*/
@Test
public void testConfig() throws IOException {
final CustomSearchConfiguration defaultConfig;
final CustomSearchConfiguration fixtureConfig;

try (InputStream stream = new ClassPathResource(DEFAULT_CONFIG).getInputStream()) {
defaultConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class);
}
try (InputStream stream = new ClassPathResource(TEST_FIXTURE_CONFIG).getInputStream()) {
fixtureConfig = MAPPER.readValue(stream, CustomSearchConfiguration.class);

// test specifics
((List<Map<String, Object>>)
fixtureConfig.getQueryConfigurations().get(1).getFunctionScore().get("functions"))
.remove(1);

((List<Map<String, Object>>)
fixtureConfig.getQueryConfigurations().get(2).getFunctionScore().get("functions"))
.remove(1);
}

assertEquals(fixtureConfig, defaultConfig);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -944,7 +944,7 @@ public void testSmokeTestQueries() {
"covid",
2,
"\"raw_orders\"",
6,
1,
STRUCTURED_QUERY_PREFIX + "sample",
3,
STRUCTURED_QUERY_PREFIX + "\"sample\"",
Expand Down Expand Up @@ -1327,24 +1327,24 @@ public void testScrollAcrossEntities() throws IOException {
totalResults += numResults;
scrollId = result.getScrollId();
} while (scrollId != null);
// expect 8 total matching results
assertEquals(totalResults, 8);
// expect 2 total matching results
assertEquals(totalResults, 2);
}

@Test
public void testSearchAcrossMultipleEntities() {
String query = "logging_events";
String query = "logging events";
SearchResult result = search(getOperationContext(), getSearchService(), query);
assertEquals((int) result.getNumEntities(), 8);
assertEquals((int) result.getNumEntities(), 6);
result =
search(
getOperationContext(),
getSearchService(),
List.of(DATASET_ENTITY_NAME, DATA_JOB_ENTITY_NAME),
query);
assertEquals((int) result.getNumEntities(), 8);
assertEquals((int) result.getNumEntities(), 6);
result = search(getOperationContext(), getSearchService(), List.of(DATASET_ENTITY_NAME), query);
assertEquals((int) result.getNumEntities(), 4);
assertEquals((int) result.getNumEntities(), 2);
result =
search(getOperationContext(), getSearchService(), List.of(DATA_JOB_ENTITY_NAME), query);
assertEquals((int) result.getNumEntities(), 4);
Expand Down Expand Up @@ -1706,7 +1706,7 @@ public void testOr() {
assertTrue(
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));
assertEquals(result.getEntities().size(), 8);
assertEquals(result.getEntities().size(), 2);
}

@Test
Expand All @@ -1729,7 +1729,7 @@ public void testNegate() {
assertTrue(
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));
assertEquals(result.getEntities().size(), 8);
assertEquals(result.getEntities().size(), 2);
}

@Test
Expand All @@ -1755,6 +1755,27 @@ public void testPrefix() {
assertEquals(result.getEntities().size(), 8);
}

@Test
public void testQuotedPrefixDescriptionField() {
String query = "\"Constructs the fct_users_deleted\"";
SearchResult result = searchAcrossEntities(getOperationContext(), getSearchService(), query);
assertTrue(
result.hasEntities() && !result.getEntities().isEmpty(),
String.format("%s - Expected search results", query));

assertTrue(
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));
assertEquals(result.getEntities().size(), 4);

assertTrue(
result.getEntities().stream()
.allMatch(
e ->
e.getMatchedFields().stream().anyMatch(m -> m.getName().equals("description"))),
"%s - Expected search results to match on description field based on prefix match");
}

@Test
public void testParens() {
String query = "dbt | (bigquery + covid19)";
Expand Down Expand Up @@ -1878,7 +1899,7 @@ public void testPrefixVsExact() {
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));

assertEquals(result.getEntities().size(), 10);
assertEquals(result.getEntities().size(), 2);
assertEquals(
result.getEntities().get(0).getEntity().toString(),
"urn:li:dataset:(urn:li:dataPlatform:dbt,cypress_project.jaffle_shop.customers,PROD)",
Expand Down Expand Up @@ -1937,9 +1958,9 @@ public void testColumnExactMatch() {
result.getEntities().stream().noneMatch(e -> e.getMatchedFields().isEmpty()),
String.format("%s - Expected search results to include matched fields", query));

assertTrue(
result.getEntities().size() > 2,
String.format("%s - Expected search results to have at least two results", query));
assertFalse(
result.getEntities().isEmpty(),
String.format("%s - Expected search results to have at least 1 result.", query));
assertEquals(
result.getEntities().get(0).getEntity().toString(),
"urn:li:dataset:(urn:li:dataPlatform:testOnly," + "important_units" + ",PROD)",
Expand Down
84 changes: 66 additions & 18 deletions metadata-io/src/test/resources/search_config_fixture_test.yml
Original file line number Diff line number Diff line change
@@ -1,51 +1,99 @@
# Use for testing with search fixtures
queryConfigurations:
# Select *
# Select */explore all
# Attempt to rank active incidents at the top followed by enrichment factors
- queryRegex: '[*]|'
simpleQuery: false
prefixMatchQuery: false
exactMatchQuery: false
functionScore:
functions:
- filter:
match_all: {}
weight: 1
term:
hasActiveIncidents:
value: true
weight: 2.0
- filter:
term:
hasDescription:
value: true
weight: 1.25
- filter:
term:
hasOwners:
value: true
weight: 1.25
- filter:
term:
hasDomain:
value: true
weight: 1.1
- filter:
term:
hasGlossaryTerms:
value: true
weight: 1.1
- filter:
term:
hasTags:
value: true
weight: 1.1
- filter:
term:
hasRowCount:
value: true
weight: 1.05
- filter:
term:
materialized:
hasColumnCount:
value: true
weight: 0.5
weight: 1.05
- filter:
term:
deprecated:
value: true
weight: 0.5
score_mode: avg
boost_mode: multiply
weight: 0.25
score_mode: multiply
boost_mode: replace

- queryRegex: .*
simpleQuery: true
# Criteria for exact-match only
# Contains quotes, is a single term with `_`, `.`, or `-` (normally consider for tokenization) then use exact match query
- queryRegex: >-
^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$
simpleQuery: false
prefixMatchQuery: true
exactMatchQuery: true
functionScore:
functions:
- filter:
match_all: {}
weight: 1
- filter:
term:
materialized:
deprecated:
value: true
weight: 0.5
weight: 0.25
- filter:
terms:
tags:
- urn:li:tag:pii
weight: 1.25
score_mode: multiply
boost_mode: multiply

# default
- queryRegex: .*
simpleQuery: true
prefixMatchQuery: true
exactMatchQuery: true
functionScore:
functions:
- filter:
term:
deprecated:
value: true
weight: 0.5
weight: 0.25
- filter:
terms:
tags:
- urn:li:tag:pii
weight: 1.25
score_mode: avg
boost_mode: multiply
score_mode: multiply
boost_mode: multiply
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
import lombok.Builder;
import lombok.EqualsAndHashCode;
import lombok.Getter;
import lombok.ToString;

@Builder(toBuilder = true)
@Getter
@EqualsAndHashCode
@ToString
@JsonDeserialize(builder = CustomSearchConfiguration.CustomSearchConfigurationBuilder.class)
public class CustomSearchConfiguration {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ queryConfigurations:
- queryRegex: >-
^["'].+["']$|^[a-zA-Z0-9]\S+[_.-]\S+[a-zA-Z0-9]$
simpleQuery: false
prefixMatchQuery: false
prefixMatchQuery: true
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the impactful change here

exactMatchQuery: true
functionScore:
functions:
Expand Down
Loading