From c5aa72ef5332c8655c1529a6c29c459239b8e04e Mon Sep 17 00:00:00 2001 From: Dexter Lee Date: Fri, 20 Aug 2021 11:50:09 -0700 Subject: [PATCH 1/8] Add searchable annotation to maps --- docs/modeling/extending-the-metadata-model.md | 13 ++-- .../models/SearchableFieldSpecExtractor.java | 77 ++++++++++--------- .../annotation/SearchableAnnotation.java | 2 + .../models/EntitySpecBuilderTest.java | 7 +- .../metadata/extractor/FieldExtractor.java | 13 +++- .../com/linkedin/metadata/TestEntityUtil.java | 3 + .../extractor/FieldExtractorTest.java | 1 + .../com/linkedin/common/CustomProperties.pdl | 1 + .../com/datahub/test/TestEntityInfo.pdl | 3 +- 9 files changed, 74 insertions(+), 46 deletions(-) diff --git a/docs/modeling/extending-the-metadata-model.md b/docs/modeling/extending-the-metadata-model.md index 2100832343a50d..221cee62165f71 100644 --- a/docs/modeling/extending-the-metadata-model.md +++ b/docs/modeling/extending-the-metadata-model.md @@ -167,11 +167,11 @@ The Aspect has four key components: its properties, the @Aspect annotation, the references to other entities, of type Urn or optionally `Urn` - The @Aspect annotation. This is used to declare that the record is an Aspect and can be included in an entity’s Snapshot. Unlike the other two annotations, @Aspect is applied to the entire record rather than a specific field. - Note, you can mark an aspect as a timeseries aspect. Check out - this [doc](metadata-model.md#timeseries-aspects) for details. -- The @Searchable annotation. This annotation can be applied to any primitive field to indicate that it should be - indexed in Elasticsearch and can be searched on. For a complete guide on using the search annotation, see the - annotation docs further down in this document. + Note, you can mark an aspect as a timeseries aspect. Check out this [doc](metadata-model.md#timeseries-aspects) for + details. +- The @Searchable annotation. This annotation can be applied to any primitive field or a map field to indicate that it + should be indexed in Elasticsearch and can be searched on. For a complete guide on using the search annotation, see + the annotation docs further down in this document. - The @Relationship annotations. These annotations create edges between the Snapshot’s Urn and the destination of the annotated field when the snapshots are ingested. @Relationship annotations must be applied to fields of type Urn. In the case of DashboardInfo, the `charts` field is an Array of Urns. The @Relationship annotation cannot be applied @@ -398,6 +398,9 @@ ranking. Now, when Datahub ingests Dashboards, it will index the Dashboard’s title in Elasticsearch. When a user searches for Dashboards, that query will be used to search on the title index and matching Dashboards will be returned. +Note, when @Searchable annotation is applied to a map, it will convert it into a list with "key.toString() +=value.toString()" as elements. This allows us to index map fields, while not increasing the number of columns indexed. + #### @Relationship This annotation is applied to fields inside an Aspect. This annotation creates edges between an Entity’s Urn and the diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableFieldSpecExtractor.java b/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableFieldSpecExtractor.java index 73784361aff2c8..f64015ca5371ee 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableFieldSpecExtractor.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableFieldSpecExtractor.java @@ -38,45 +38,51 @@ public void callbackOnContext(TraverserContext context, DataSchemaTraverse.Order final DataSchema currentSchema = context.getCurrentSchema().getDereferencedDataSchema(); - // First, check properties for primary annotation definition. - final Map properties = context.getEnclosingField().getProperties(); - final Object primaryAnnotationObj = properties.get(SearchableAnnotation.ANNOTATION_NAME); + final Object annotationObj = getAnnotationObj(context); - if (primaryAnnotationObj != null) { - validatePropertiesAnnotation(currentSchema, primaryAnnotationObj, context.getTraversePath().toString()); - } - - // Next, check resolved properties for annotations on primitives. - final Map resolvedProperties = FieldSpecUtils.getResolvedProperties(currentSchema); - final Object resolvedAnnotationObj = resolvedProperties.get(SearchableAnnotation.ANNOTATION_NAME); - - if (resolvedAnnotationObj != null) { + if (annotationObj != null) { if (currentSchema.getDereferencedDataSchema().isComplex()) { final ComplexDataSchema complexSchema = (ComplexDataSchema) currentSchema; if (isValidComplexType(complexSchema)) { - extractSearchableAnnotation(resolvedAnnotationObj, currentSchema, context); + extractSearchableAnnotation(annotationObj, currentSchema, context); } } else if (isValidPrimitiveType((PrimitiveDataSchema) currentSchema)) { - extractSearchableAnnotation(resolvedAnnotationObj, currentSchema, context); + extractSearchableAnnotation(annotationObj, currentSchema, context); } else { - throw new ModelValidationException(String.format("Invalid @Searchable Annotation at %s", context.getSchemaPathSpec().toString())); + throw new ModelValidationException( + String.format("Invalid @Searchable Annotation at %s", context.getSchemaPathSpec().toString())); } } } } - private void extractSearchableAnnotation( - final Object annotationObj, - final DataSchema currentSchema, + private Object getAnnotationObj(TraverserContext context) { + final DataSchema currentSchema = context.getCurrentSchema().getDereferencedDataSchema(); + + // First, check properties for primary annotation definition. + final Map properties = context.getEnclosingField().getProperties(); + final Object primaryAnnotationObj = properties.get(SearchableAnnotation.ANNOTATION_NAME); + + if (primaryAnnotationObj != null) { + validatePropertiesAnnotation(currentSchema, primaryAnnotationObj, context.getTraversePath().toString()); + if (currentSchema.getDereferencedType() == DataSchema.Type.MAP) { + return primaryAnnotationObj; + } + } + + // Next, check resolved properties for annotations on primitives. + final Map resolvedProperties = FieldSpecUtils.getResolvedProperties(currentSchema); + return resolvedProperties.get(SearchableAnnotation.ANNOTATION_NAME); + } + + private void extractSearchableAnnotation(final Object annotationObj, final DataSchema currentSchema, final TraverserContext context) { final PathSpec path = new PathSpec(context.getSchemaPathSpec()); final SearchableAnnotation annotation = - SearchableAnnotation.fromPegasusAnnotationObject( - annotationObj, - FieldSpecUtils.getSchemaFieldName(path), + SearchableAnnotation.fromPegasusAnnotationObject(annotationObj, FieldSpecUtils.getSchemaFieldName(path), currentSchema.getDereferencedType(), path.toString()); - if (_searchFieldNamesToPatch.containsKey(annotation.getFieldName()) - && !_searchFieldNamesToPatch.get(annotation.getFieldName()).equals(context.getSchemaPathSpec().toString())) { + if (_searchFieldNamesToPatch.containsKey(annotation.getFieldName()) && !_searchFieldNamesToPatch.get( + annotation.getFieldName()).equals(context.getSchemaPathSpec().toString())) { throw new ModelValidationException( String.format("Entity has multiple searchable fields with the same field name %s", annotation.getFieldName())); @@ -97,7 +103,8 @@ public SchemaVisitorTraversalResult getSchemaVisitorTraversalResult() { } private Boolean isValidComplexType(final ComplexDataSchema schema) { - return DataSchema.Type.ENUM.equals(schema.getDereferencedDataSchema().getDereferencedType()); + return DataSchema.Type.ENUM.equals(schema.getDereferencedDataSchema().getDereferencedType()) + || DataSchema.Type.MAP.equals(schema.getDereferencedDataSchema().getDereferencedType()); } private Boolean isValidPrimitiveType(final PrimitiveDataSchema schema) { @@ -107,7 +114,9 @@ private Boolean isValidPrimitiveType(final PrimitiveDataSchema schema) { private void validatePropertiesAnnotation(DataSchema currentSchema, Object annotationObj, String pathStr) { // If primitive, assume the annotation is well formed until resolvedProperties reflects it. - if (currentSchema.isPrimitive() || currentSchema.getDereferencedType().equals(DataSchema.Type.ENUM)) { + if (currentSchema.isPrimitive() || currentSchema.getDereferencedType().equals(DataSchema.Type.ENUM) || currentSchema + .getDereferencedType() + .equals(DataSchema.Type.MAP)) { return; } @@ -115,26 +124,22 @@ private void validatePropertiesAnnotation(DataSchema currentSchema, Object annot if (!Map.class.isAssignableFrom(annotationObj.getClass())) { throw new ModelValidationException(String.format( "Failed to validate @%s annotation declared inside %s: Invalid value type provided (Expected Map)", - SearchableAnnotation.ANNOTATION_NAME, - pathStr - )); + SearchableAnnotation.ANNOTATION_NAME, pathStr)); } Map annotationMap = (Map) annotationObj; if (annotationMap.size() == 0) { - throw new ModelValidationException( - String.format("Invalid @Searchable Annotation at %s. Annotation placed on invalid field of type %s. Must be placed on primitive field.", - pathStr, - currentSchema.getType())); + throw new ModelValidationException(String.format( + "Invalid @Searchable Annotation at %s. Annotation placed on invalid field of type %s. Must be placed on primitive field.", + pathStr, currentSchema.getType())); } for (String key : annotationMap.keySet()) { if (!key.startsWith(Character.toString(PathSpec.SEPARATOR))) { - throw new ModelValidationException( - String.format("Invalid @Searchable Annotation at %s. Annotation placed on invalid field of type %s. Must be placed on primitive field.", - pathStr, - currentSchema.getType())); + throw new ModelValidationException(String.format( + "Invalid @Searchable Annotation at %s. Annotation placed on invalid field of type %s. Must be placed on primitive field.", + pathStr, currentSchema.getType())); } } } diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/SearchableAnnotation.java b/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/SearchableAnnotation.java index a42eadea2fe946..0072afb0db6070 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/SearchableAnnotation.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/annotation/SearchableAnnotation.java @@ -104,6 +104,8 @@ private static FieldType getDefaultFieldType(DataSchema.Type schemaDataType) { case INT: case FLOAT: return FieldType.COUNT; + case MAP: + return FieldType.KEYWORD; default: return FieldType.TEXT; } diff --git a/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java b/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java index 5afe5f7705168a..aa9b1ace5ae6e9 100644 --- a/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java +++ b/entity-registry/src/test/java/com/linkedin/metadata/models/EntitySpecBuilderTest.java @@ -114,7 +114,12 @@ private void validateTestEntityInfo(final AspectSpec testEntityInfo) { assertEquals(new TestEntityInfo().schema().getFullName(), testEntityInfo.getPegasusSchema().getFullName()); // Assert on Searchable Fields - assertEquals(7, testEntityInfo.getSearchableFieldSpecs().size()); + assertEquals(8, testEntityInfo.getSearchableFieldSpecs().size()); + assertEquals("customProperties", testEntityInfo.getSearchableFieldSpecMap().get( + new PathSpec("customProperties").toString()).getSearchableAnnotation().getFieldName()); + assertEquals(SearchableAnnotation.FieldType.KEYWORD, testEntityInfo.getSearchableFieldSpecMap().get( + new PathSpec("customProperties").toString()) + .getSearchableAnnotation().getFieldType()); assertEquals("textFieldOverride", testEntityInfo.getSearchableFieldSpecMap().get( new PathSpec("textField").toString()).getSearchableAnnotation().getFieldName()); assertEquals(SearchableAnnotation.FieldType.TEXT, testEntityInfo.getSearchableFieldSpecMap().get( diff --git a/metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java b/metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java index 74f883f2b1302a..83b85597b99a94 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java @@ -40,7 +40,15 @@ public static Map> extractFields(RecordTem long numArrayWildcards = getNumArrayWildcards(fieldSpec.getPath()); // Not an array field if (numArrayWildcards == 0) { - extractedFields.put(fieldSpec, Collections.singletonList(value.get())); + // For maps, convert it into a list of the form key=value + if (value.get() instanceof Map) { + extractedFields.put(fieldSpec, ((Map) value.get()).entrySet() + .stream() + .map(entry -> entry.getKey().toString() + "=" + entry.getValue().toString()) + .collect(Collectors.toList())); + } else { + extractedFields.put(fieldSpec, Collections.singletonList(value.get())); + } } else { List valueList = (List) value.get(); // If the field is a nested list of values, flatten it @@ -50,8 +58,7 @@ public static Map> extractFields(RecordTem extractedFields.put(fieldSpec, valueList); } } - } - return extractedFields; + } return extractedFields; } public static Map> extractFieldsFromSnapshot(RecordTemplate snapshot, diff --git a/metadata-io/src/test/java/com/linkedin/metadata/TestEntityUtil.java b/metadata-io/src/test/java/com/linkedin/metadata/TestEntityUtil.java index 6f46d0bbdbdb2e..82aed002907aa6 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/TestEntityUtil.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/TestEntityUtil.java @@ -11,9 +11,11 @@ import com.datahub.test.TestEntityKey; import com.datahub.test.TestEntitySnapshot; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.linkedin.common.urn.TestEntityUrn; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; +import com.linkedin.data.template.StringMap; public class TestEntityUtil { @@ -37,6 +39,7 @@ public static TestEntityInfo getTestEntityInfo(Urn urn) { ImmutableList.of(new SimpleNestedRecord2().setNestedArrayStringField("nestedArray1"), new SimpleNestedRecord2().setNestedArrayStringField("nestedArray2") .setNestedArrayArrayField(new StringArray(ImmutableList.of("testNestedArray1", "testNestedArray2")))))); + testEntityInfo.setCustomProperties(new StringMap(ImmutableMap.of("key1", "value1", "key2", "value2"))); return testEntityInfo; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/extractor/FieldExtractorTest.java b/metadata-io/src/test/java/com/linkedin/metadata/extractor/FieldExtractorTest.java index 9bb72a2560a782..33ef9db17283ed 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/extractor/FieldExtractorTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/extractor/FieldExtractorTest.java @@ -43,5 +43,6 @@ public void testExtractor() { assertEquals(result.get(nameToSpec.get("nestedIntegerField")), ImmutableList.of(1)); assertEquals(result.get(nameToSpec.get("nestedArrayStringField")), ImmutableList.of("nestedArray1", "nestedArray2")); assertEquals(result.get(nameToSpec.get("nestedArrayArrayField")), ImmutableList.of("testNestedArray1", "testNestedArray2")); + assertEquals(result.get(nameToSpec.get("customProperties")), ImmutableList.of("key1=value1", "key2=value2")); } } diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl index 7edd8cde6af554..82a4c0db75b865 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl @@ -7,5 +7,6 @@ record CustomProperties { /** * Custom property bag. */ + @Searchable = {} customProperties: map[string, string] = { } } \ No newline at end of file diff --git a/test-models/src/main/pegasus/com/datahub/test/TestEntityInfo.pdl b/test-models/src/main/pegasus/com/datahub/test/TestEntityInfo.pdl index cdd643c4c91efd..7ea5a7e462a9c3 100644 --- a/test-models/src/main/pegasus/com/datahub/test/TestEntityInfo.pdl +++ b/test-models/src/main/pegasus/com/datahub/test/TestEntityInfo.pdl @@ -1,6 +1,7 @@ namespace com.datahub.test import com.linkedin.common.Urn +import com.linkedin.common.CustomProperties /** * Info associated with a Test Entity @@ -8,7 +9,7 @@ import com.linkedin.common.Urn @Aspect = { "name": "testEntityInfo" } -record TestEntityInfo { +record TestEntityInfo includes CustomProperties { @Searchable = { "fieldName": "textFieldOverride", From 7ffab0bc8dfde994c4044f983af30f7477db8543 Mon Sep 17 00:00:00 2001 From: Dexter Lee Date: Fri, 20 Aug 2021 14:37:30 -0700 Subject: [PATCH 2/8] Fix --- .../java/com/linkedin/metadata/extractor/FieldExtractor.java | 3 ++- .../metadata/search/transformer/SearchDocumentTransformer.java | 2 +- .../search/elasticsearch/indexbuilder/MappingsBuilderTest.java | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java b/metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java index 83b85597b99a94..8bf6ff71ea4d21 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java @@ -58,7 +58,8 @@ public static Map> extractFields(RecordTem extractedFields.put(fieldSpec, valueList); } } - } return extractedFields; + } + return extractedFields; } public static Map> extractFieldsFromSnapshot(RecordTemplate snapshot, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java index 5c29e4a50ae79e..78674a37823e40 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/transformer/SearchDocumentTransformer.java @@ -102,7 +102,7 @@ public static void setValue(final SearchableFieldSpec fieldSpec, final List getNodeForValue(valueType, value, fieldType).ifPresent(arrayNode::add)); searchDocument.set(fieldName, arrayNode); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilderTest.java index c20840bf183730..f1d5b28828225b 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilderTest.java @@ -22,7 +22,8 @@ public void testMappingsBuilder() { assertTrue(properties.containsKey("browsePaths")); // KEYWORD assertEquals(properties.get("keyPart3"), ImmutableMap.of("type", "keyword", "normalizer", "keyword_normalizer")); - + assertEquals(properties.get("customProperties"), + ImmutableMap.of("type", "keyword", "normalizer", "keyword_normalizer")); // TEXT Map nestedArrayStringField = (Map) properties.get("nestedArrayStringField"); assertEquals(nestedArrayStringField.get("type"), "keyword"); From 6ebff2bd6135c0be3d090fadcd80909a8a6e81ab Mon Sep 17 00:00:00 2001 From: Dexter Lee Date: Tue, 24 Aug 2021 09:51:42 -0700 Subject: [PATCH 3/8] Fix test --- .../search/elasticsearch/indexbuilder/MappingsBuilderTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilderTest.java index f1d5b28828225b..a6c9457fcfd1fd 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/MappingsBuilderTest.java @@ -17,7 +17,7 @@ public void testMappingsBuilder() { Map result = MappingsBuilder.getMappings(TestEntitySpecBuilder.getSpec()); assertEquals(result.size(), 1); Map properties = (Map) result.get("properties"); - assertEquals(properties.size(), 11); + assertEquals(properties.size(), 12); assertEquals(properties.get("urn"), ImmutableMap.of("type", "keyword")); assertTrue(properties.containsKey("browsePaths")); // KEYWORD From 1a0df58e258ae0fba7580eaef002d8f63f0963fd Mon Sep 17 00:00:00 2001 From: Dexter Lee Date: Tue, 24 Aug 2021 15:32:12 -0700 Subject: [PATCH 4/8] Add tod efault and add length limit --- .../com/linkedin/metadata/extractor/FieldExtractor.java | 8 ++++++-- .../main/pegasus/com/linkedin/common/CustomProperties.pdl | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java b/metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java index 8bf6ff71ea4d21..c0e6e938703884 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/extractor/FieldExtractor.java @@ -6,6 +6,7 @@ import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.FieldSpec; +import com.linkedin.util.Pair; import java.util.Collections; import java.util.HashMap; import java.util.List; @@ -21,6 +22,7 @@ public class FieldExtractor { private static final String ARRAY_WILDCARD = "*"; + private static final int MAX_VALUE_LENGTH = 200; private FieldExtractor() { } @@ -40,11 +42,13 @@ public static Map> extractFields(RecordTem long numArrayWildcards = getNumArrayWildcards(fieldSpec.getPath()); // Not an array field if (numArrayWildcards == 0) { - // For maps, convert it into a list of the form key=value + // For maps, convert it into a list of the form key=value (Filter out long values) if (value.get() instanceof Map) { extractedFields.put(fieldSpec, ((Map) value.get()).entrySet() .stream() - .map(entry -> entry.getKey().toString() + "=" + entry.getValue().toString()) + .map(entry -> new Pair<>(entry.getKey().toString(), entry.getValue().toString())) + .filter(entry -> entry.getValue().length() < MAX_VALUE_LENGTH) + .map(entry -> entry.getKey() + "=" + entry.getValue()) .collect(Collectors.toList())); } else { extractedFields.put(fieldSpec, Collections.singletonList(value.get())); diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl index 82a4c0db75b865..b9d386ea83d1ea 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl @@ -7,6 +7,8 @@ record CustomProperties { /** * Custom property bag. */ - @Searchable = {} + @Searchable = { + "queryByDefault": "true" + } customProperties: map[string, string] = { } } \ No newline at end of file From 76a02b46af59eb9272117a05fb482c032af0b167 Mon Sep 17 00:00:00 2001 From: Dexter Lee Date: Tue, 24 Aug 2021 16:06:34 -0700 Subject: [PATCH 5/8] Fix annotation --- .../src/main/pegasus/com/linkedin/common/CustomProperties.pdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl index b9d386ea83d1ea..7b3423a90ecfae 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl @@ -8,7 +8,7 @@ record CustomProperties { * Custom property bag. */ @Searchable = { - "queryByDefault": "true" + "queryByDefault": true } customProperties: map[string, string] = { } } \ No newline at end of file From b026d8807dd7a6a291dde3e9f4cce4d8756e09bd Mon Sep 17 00:00:00 2001 From: Dexter Lee Date: Mon, 30 Aug 2021 18:36:43 -0700 Subject: [PATCH 6/8] In progress --- .../metadata/models/SearchableFieldSpecExtractor.java | 7 ++++--- .../main/pegasus/com/linkedin/common/CustomProperties.pdl | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableFieldSpecExtractor.java b/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableFieldSpecExtractor.java index f64015ca5371ee..462d986557c30c 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableFieldSpecExtractor.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableFieldSpecExtractor.java @@ -83,9 +83,10 @@ private void extractSearchableAnnotation(final Object annotationObj, final DataS currentSchema.getDereferencedType(), path.toString()); if (_searchFieldNamesToPatch.containsKey(annotation.getFieldName()) && !_searchFieldNamesToPatch.get( annotation.getFieldName()).equals(context.getSchemaPathSpec().toString())) { - throw new ModelValidationException( - String.format("Entity has multiple searchable fields with the same field name %s", - annotation.getFieldName())); + return; + // throw new ModelValidationException( +// String.format("Entity has multiple searchable fields with the same field name %s", +// annotation.getFieldName())); } final SearchableFieldSpec fieldSpec = new SearchableFieldSpec(path, annotation, currentSchema); _specs.add(fieldSpec); diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl index 7b3423a90ecfae..7cac6a3336540e 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl @@ -8,7 +8,9 @@ record CustomProperties { * Custom property bag. */ @Searchable = { - "queryByDefault": true + "/map": { + "queryByDefault": true + } } customProperties: map[string, string] = { } } \ No newline at end of file From aab461e0c1803f05f33ebd9dea5ff57802563e10 Mon Sep 17 00:00:00 2001 From: Dexter Lee Date: Fri, 3 Sep 2021 00:07:00 -0700 Subject: [PATCH 7/8] Fixed --- .../models/SearchableFieldSpecExtractor.java | 21 +++++++++++++------ .../com/linkedin/common/CustomProperties.pdl | 2 +- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableFieldSpecExtractor.java b/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableFieldSpecExtractor.java index 462d986557c30c..01ea48beb3a3a4 100644 --- a/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableFieldSpecExtractor.java +++ b/entity-registry/src/main/java/com/linkedin/metadata/models/SearchableFieldSpecExtractor.java @@ -24,6 +24,8 @@ public class SearchableFieldSpecExtractor implements SchemaVisitor { private final List _specs = new ArrayList<>(); private final Map _searchFieldNamesToPatch = new HashMap<>(); + private static final String MAP = "map"; + public List getSpecs() { return _specs; } @@ -65,11 +67,19 @@ private Object getAnnotationObj(TraverserContext context) { if (primaryAnnotationObj != null) { validatePropertiesAnnotation(currentSchema, primaryAnnotationObj, context.getTraversePath().toString()); - if (currentSchema.getDereferencedType() == DataSchema.Type.MAP) { - return primaryAnnotationObj; + // Unfortunately, annotations on collections always need to be a nested map (byproduct of making overrides work) + // As such, for annotation maps, we make it a single entry map, where the key has no meaning + if (currentSchema.getDereferencedType() == DataSchema.Type.MAP && primaryAnnotationObj instanceof Map + && !((Map) primaryAnnotationObj).isEmpty()) { + return ((Map) primaryAnnotationObj).entrySet().stream().findFirst().get().getValue(); } } + // Check if the path has map in it. Individual values of the maps (actual maps are caught above) can be ignored + if (context.getTraversePath().contains(MAP)) { + return null; + } + // Next, check resolved properties for annotations on primitives. final Map resolvedProperties = FieldSpecUtils.getResolvedProperties(currentSchema); return resolvedProperties.get(SearchableAnnotation.ANNOTATION_NAME); @@ -83,10 +93,9 @@ private void extractSearchableAnnotation(final Object annotationObj, final DataS currentSchema.getDereferencedType(), path.toString()); if (_searchFieldNamesToPatch.containsKey(annotation.getFieldName()) && !_searchFieldNamesToPatch.get( annotation.getFieldName()).equals(context.getSchemaPathSpec().toString())) { - return; - // throw new ModelValidationException( -// String.format("Entity has multiple searchable fields with the same field name %s", -// annotation.getFieldName())); + throw new ModelValidationException( + String.format("Entity has multiple searchable fields with the same field name %s", + annotation.getFieldName())); } final SearchableFieldSpec fieldSpec = new SearchableFieldSpec(path, annotation, currentSchema); _specs.add(fieldSpec); diff --git a/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl index 7cac6a3336540e..8390a05846c83b 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/common/CustomProperties.pdl @@ -8,7 +8,7 @@ record CustomProperties { * Custom property bag. */ @Searchable = { - "/map": { + "/*": { "queryByDefault": true } } From 38a09f149fdde7acda8d43d8a381f1a04d8b32a4 Mon Sep 17 00:00:00 2001 From: Dexter Lee Date: Tue, 7 Sep 2021 11:17:11 -0700 Subject: [PATCH 8/8] Fix tests --- .../elasticsearch/query/request/SearchQueryBuilderTest.java | 3 ++- .../elasticsearch/query/request/SearchRequestHandlerTest.java | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java index 29fbb610b4b768..ff937361c4b753 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java @@ -25,10 +25,11 @@ public void testQueryBuilder() { assertEquals(keywordQuery.queryString(), "testQuery"); assertEquals(keywordQuery.analyzer(), "custom_keyword"); Map keywordFields = keywordQuery.fields(); - assertEquals(keywordFields.size(), 7); + assertEquals(keywordFields.size(), 8); assertEquals(keywordFields.get("keyPart1").floatValue(), 10.0f); assertFalse(keywordFields.containsKey("keyPart3")); assertEquals(keywordFields.get("textFieldOverride").floatValue(), 1.0f); + assertEquals(keywordFields.get("customProperties").floatValue(), 1.0f); QueryStringQueryBuilder textQuery = (QueryStringQueryBuilder) shouldQueries.get(1); assertEquals(textQuery.queryString(), "testQuery"); assertEquals(textQuery.analyzer(), "word_delimited"); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java index cbc0d1f3015eee..63464a01b1bdf9 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java @@ -34,10 +34,10 @@ public void testSearchRequestHandler() { HighlightBuilder highlightBuilder = sourceBuilder.highlighter(); List fields = highlightBuilder.fields().stream().map(HighlightBuilder.Field::name).collect(Collectors.toList()); - assertEquals(fields.size(), 14); + assertEquals(fields.size(), 16); List highlightableFields = ImmutableList.of("keyPart1", "textArrayField", "textFieldOverride", "foreignKey", "nestedForeignKey", - "nestedArrayStringField", "nestedArrayArrayField"); + "nestedArrayStringField", "nestedArrayArrayField", "customProperties"); highlightableFields.forEach(field -> { assertTrue(fields.contains(field)); assertTrue(fields.contains(field + ".*"));