Skip to content

Commit 6067396

Browse files
committed
Refactor SemanticVersionFieldMapper for flexible index, store, and doc_values support
Signed-off-by: Siddhant Deshmukh <deshsid@amazon.com>
1 parent a7add4a commit 6067396

File tree

3 files changed

+171
-79
lines changed

3 files changed

+171
-79
lines changed

server/src/main/java/org/opensearch/index/mapper/SemanticVersion.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
* @see <a href="https://github.com/opensearch-project/OpenSearch/issues/16814">OpenSearch github issue</a>
2424
*/
2525
public class SemanticVersion implements Comparable<SemanticVersion> {
26+
27+
// Regex used to check SemVer string. Source: https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string
28+
private static final String SEMANTIC_VERSION_REGEX =
29+
"^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$";
2630
private final int major;
2731
private final int minor;
2832
private final int patch;
@@ -74,9 +78,7 @@ public static SemanticVersion parse(String version) {
7478
version = version.replaceAll("\\s+", ".");
7579
}
7680

77-
Pattern pattern = Pattern.compile(
78-
"^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$"
79-
);
81+
Pattern pattern = Pattern.compile(SEMANTIC_VERSION_REGEX);
8082

8183
Matcher matcher = pattern.matcher(version);
8284
if (!matcher.matches()) {

server/src/main/java/org/opensearch/index/mapper/SemanticVersionFieldMapper.java

Lines changed: 138 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import org.apache.lucene.index.DocValuesType;
1717
import org.apache.lucene.index.IndexOptions;
1818
import org.apache.lucene.index.Term;
19-
import org.apache.lucene.search.FieldExistsQuery;
2019
import org.apache.lucene.search.FuzzyQuery;
2120
import org.apache.lucene.search.MultiTermQuery;
2221
import org.apache.lucene.search.PrefixQuery;
@@ -54,7 +53,7 @@ public class SemanticVersionFieldMapper extends ParametrizedFieldMapper {
5453

5554
static {
5655
FIELD_TYPE.setTokenized(false);
57-
FIELD_TYPE.setStored(true);
56+
FIELD_TYPE.setStored(false);
5857
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
5958
FIELD_TYPE.setDocValuesType(DocValuesType.SORTED_SET);
6059
FIELD_TYPE.freeze();
@@ -84,7 +83,13 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults,
8483
*/
8584
public static class Builder extends ParametrizedFieldMapper.Builder {
8685
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
87-
private final Parameter<Boolean> docValues = Parameter.docValuesParam(m -> true, true);
86+
private final Parameter<Boolean> indexed = Parameter.indexParam(m -> toType(m).isSearchable, true).alwaysSerialize();
87+
private final Parameter<Boolean> hasDocValues = Parameter.docValuesParam(m -> toType(m).hasDocValues, true);
88+
private final Parameter<Boolean> stored = Parameter.storeParam(m -> toType(m).isStored, false);
89+
90+
private static SemanticVersionFieldType toType(FieldMapper m) {
91+
return (SemanticVersionFieldType) ((ParametrizedFieldMapper) m).mappedFieldType;
92+
}
8893

8994
public Builder(String name) {
9095
super(name);
@@ -93,17 +98,31 @@ public Builder(String name) {
9398
@Override
9499
protected List<Parameter<?>> getParameters() {
95100
List<Parameter<?>> parameters = new ArrayList<>();
101+
parameters.add(indexed);
102+
parameters.add(hasDocValues);
103+
parameters.add(stored);
96104
parameters.add(meta);
97-
parameters.add(docValues);
98105
return parameters;
99106
}
100107

101108
@Override
102109
public SemanticVersionFieldMapper build(BuilderContext context) {
110+
FieldType fieldType = new FieldType();
111+
fieldType.setTokenized(false);
112+
fieldType.setStored(stored.getValue());
113+
fieldType.setIndexOptions(indexed.getValue() ? IndexOptions.DOCS : IndexOptions.NONE);
114+
fieldType.setDocValuesType(hasDocValues.getValue() ? DocValuesType.SORTED_SET : DocValuesType.NONE);
115+
fieldType.freeze();
103116
return new SemanticVersionFieldMapper(
104117
name,
105-
FIELD_TYPE,
106-
new SemanticVersionFieldType(buildFullName(context), meta.getValue()),
118+
fieldType,
119+
new SemanticVersionFieldType(
120+
buildFullName(context),
121+
meta.getValue(),
122+
indexed.getValue(),
123+
hasDocValues.getValue(),
124+
stored.getValue()
125+
),
107126
multiFieldsBuilder.build(this, context),
108127
copyTo.build(),
109128
meta.getValue()
@@ -121,60 +140,77 @@ public SemanticVersionFieldMapper build(BuilderContext context) {
121140
public static class SemanticVersionFieldType extends TermBasedFieldType {
122141
private final Map<String, String> meta;
123142
private final String normalizedFieldName;
124-
125-
public SemanticVersionFieldType(String name, Map<String, String> meta) {
126-
super(name, true, true, true, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
143+
private final boolean isSearchable;
144+
private final boolean hasDocValues;
145+
private final boolean isStored;
146+
147+
public SemanticVersionFieldType(
148+
String name,
149+
Map<String, String> meta,
150+
boolean isSearchable,
151+
boolean hasDocValues,
152+
boolean isStored
153+
) {
154+
super(name, isSearchable, isStored, hasDocValues, TextSearchInfo.SIMPLE_MATCH_ONLY, meta);
127155
this.meta = meta;
128-
this.normalizedFieldName = name + ".normalized";
156+
this.normalizedFieldName = name + NORMALIZED_FIELD_SUFFIX;
157+
this.isSearchable = isSearchable;
158+
this.hasDocValues = hasDocValues;
159+
this.isStored = isStored;
129160
}
130161

131162
@Override
132163
public String typeName() {
133164
return CONTENT_TYPE;
134165
}
135166

136-
@Override
137-
public Query existsQuery(QueryShardContext context) {
138-
return new FieldExistsQuery(name());
139-
}
140-
141167
@Override
142168
public Query termQuery(Object value, QueryShardContext context) {
143169
if (value == null) {
144170
throw new IllegalArgumentException("Cannot search for null value");
145171
}
146-
147-
BytesRef bytes;
148-
if (value instanceof BytesRef) {
149-
bytes = (BytesRef) value;
172+
BytesRef bytes = value instanceof BytesRef ? (BytesRef) value : new BytesRef(value.toString());
173+
Query indexQuery = isSearchable ? new TermQuery(new Term(name(), bytes)) : null;
174+
Query dvQuery = hasDocValues ? SortedSetDocValuesField.newSlowExactQuery(normalizedFieldName, bytes) : null;
175+
if (indexQuery != null && dvQuery != null) {
176+
return new org.apache.lucene.search.IndexOrDocValuesQuery(indexQuery, dvQuery);
177+
} else if (indexQuery != null) {
178+
return indexQuery;
179+
} else if (dvQuery != null) {
180+
return dvQuery;
150181
} else {
151-
bytes = new BytesRef(value.toString());
182+
throw new IllegalArgumentException("Field [" + name() + "] is neither indexed nor has doc_values enabled");
152183
}
153-
154-
return new TermQuery(new Term(name(), bytes));
155184
}
156185

157186
@Override
158187
public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) {
159-
160188
try {
161189
BytesRef lower = null;
162190
BytesRef upper = null;
163-
164191
if (lowerTerm != null) {
165192
String lowerStr = (lowerTerm instanceof BytesRef) ? ((BytesRef) lowerTerm).utf8ToString() : lowerTerm.toString();
166193
SemanticVersion lowerVersion = SemanticVersion.parse(lowerStr);
167194
lower = new BytesRef(lowerVersion.getNormalizedComparableString());
168195
}
169-
170196
if (upperTerm != null) {
171197
String upperStr = (upperTerm instanceof BytesRef) ? ((BytesRef) upperTerm).utf8ToString() : upperTerm.toString();
172198
SemanticVersion upperVersion = SemanticVersion.parse(upperStr);
173199
upper = new BytesRef(upperVersion.getNormalizedComparableString());
174200
}
175-
176-
return new TermRangeQuery(name() + NORMALIZED_FIELD_SUFFIX, lower, upper, includeLower, includeUpper);
177-
201+
Query indexQuery = isSearchable ? new TermRangeQuery(normalizedFieldName, lower, upper, includeLower, includeUpper) : null;
202+
Query dvQuery = hasDocValues
203+
? SortedSetDocValuesField.newSlowRangeQuery(normalizedFieldName, lower, upper, includeLower, includeUpper)
204+
: null;
205+
if (indexQuery != null && dvQuery != null) {
206+
return new org.apache.lucene.search.IndexOrDocValuesQuery(indexQuery, dvQuery);
207+
} else if (indexQuery != null) {
208+
return indexQuery;
209+
} else if (dvQuery != null) {
210+
return dvQuery;
211+
} else {
212+
throw new IllegalArgumentException("Field [" + name() + "] is neither indexed nor has doc_values enabled");
213+
}
178214
} catch (Exception e) {
179215
throw new QueryShardException(
180216
context,
@@ -189,6 +225,25 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower
189225
}
190226
}
191227

228+
@Override
229+
public Query termsQuery(List<?> values, QueryShardContext context) {
230+
List<BytesRef> bytesList = new ArrayList<>();
231+
for (Object value : values) {
232+
bytesList.add(value instanceof BytesRef ? (BytesRef) value : new BytesRef(value.toString()));
233+
}
234+
Query indexQuery = isSearchable ? new org.apache.lucene.search.TermInSetQuery(name(), bytesList) : null;
235+
Query dvQuery = hasDocValues ? SortedSetDocValuesField.newSlowSetQuery(normalizedFieldName, bytesList) : null;
236+
if (indexQuery != null && dvQuery != null) {
237+
return new org.apache.lucene.search.IndexOrDocValuesQuery(indexQuery, dvQuery);
238+
} else if (indexQuery != null) {
239+
return indexQuery;
240+
} else if (dvQuery != null) {
241+
return dvQuery;
242+
} else {
243+
throw new IllegalArgumentException("Field [" + name() + "] is neither indexed nor has doc_values enabled");
244+
}
245+
}
246+
192247
@Override
193248
public Query regexpQuery(
194249
String value,
@@ -198,35 +253,45 @@ public Query regexpQuery(
198253
MultiTermQuery.RewriteMethod method,
199254
QueryShardContext context
200255
) {
201-
202256
if (method == null) {
203-
method = MultiTermQuery.CONSTANT_SCORE_REWRITE; // default rewrite method
257+
method = MultiTermQuery.CONSTANT_SCORE_REWRITE;
258+
}
259+
if (isSearchable) {
260+
return new RegexpQuery(
261+
new Term(name(), indexedValueForSearch(value)),
262+
syntaxFlags,
263+
matchFlags,
264+
RegexpQuery.DEFAULT_PROVIDER,
265+
maxDeterminizedStates,
266+
method
267+
);
268+
} else {
269+
throw new IllegalArgumentException("Regexp queries require the field to be indexed");
204270
}
205-
206-
return new RegexpQuery(
207-
new Term(name(), indexedValueForSearch(value)),
208-
syntaxFlags,
209-
matchFlags,
210-
RegexpQuery.DEFAULT_PROVIDER,
211-
maxDeterminizedStates,
212-
method
213-
);
214271
}
215272

216273
@Override
217274
public Query wildcardQuery(String value, MultiTermQuery.RewriteMethod method, boolean caseInsensitive, QueryShardContext context) {
218275
if (caseInsensitive) {
219276
value = value.toLowerCase(Locale.ROOT);
220277
}
221-
return new WildcardQuery(new Term(name(), indexedValueForSearch(value)), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
278+
if (isSearchable) {
279+
return new WildcardQuery(new Term(name(), indexedValueForSearch(value)), Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
280+
} else {
281+
throw new IllegalArgumentException("Wildcard queries require the field to be indexed");
282+
}
222283
}
223284

224285
@Override
225286
public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, boolean caseInsensitive, QueryShardContext context) {
226287
if (method == null) {
227-
method = MultiTermQuery.CONSTANT_SCORE_REWRITE; // Default rewrite method
288+
method = MultiTermQuery.CONSTANT_SCORE_REWRITE;
289+
}
290+
if (isSearchable) {
291+
return new PrefixQuery(new Term(name(), indexedValueForSearch(value)), method);
292+
} else {
293+
throw new IllegalArgumentException("Prefix queries require the field to be indexed");
228294
}
229-
return new PrefixQuery(new Term(name(), indexedValueForSearch(value)), method);
230295
}
231296

232297
@Override
@@ -239,23 +304,28 @@ public Query fuzzyQuery(
239304
MultiTermQuery.RewriteMethod method,
240305
QueryShardContext context
241306
) {
242-
243307
if (method == null) {
244308
method = MultiTermQuery.CONSTANT_SCORE_REWRITE;
245309
}
246-
247-
return new FuzzyQuery(
248-
new Term(name(), indexedValueForSearch(value)),
249-
fuzziness.asDistance(),
250-
prefixLength,
251-
maxExpansions,
252-
transpositions,
253-
method
254-
);
310+
if (isSearchable) {
311+
return new FuzzyQuery(
312+
new Term(name(), indexedValueForSearch(value)),
313+
fuzziness.asDistance(),
314+
prefixLength,
315+
maxExpansions,
316+
transpositions,
317+
method
318+
);
319+
} else {
320+
throw new IllegalArgumentException("Fuzzy queries require the field to be indexed");
321+
}
255322
}
256323

257324
@Override
258325
public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchLookup, String format) {
326+
if (format != null) {
327+
throw new IllegalArgumentException("Field [" + name() + "] of type [" + typeName() + "] doesn't support formats.");
328+
}
259329
return new SourceValueFetcher(name(), context, format) {
260330
@Override
261331
protected String parseSourceValue(Object value) {
@@ -266,7 +336,10 @@ protected String parseSourceValue(Object value) {
266336

267337
@Override
268338
public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, Supplier<SearchLookup> searchLookup) {
269-
return new SortedSetOrdinalsIndexFieldData.Builder(name() + "._normalized", CoreValuesSourceType.BYTES);
339+
if (!hasDocValues) {
340+
throw new IllegalArgumentException("Field [" + name() + "] does not have doc_values enabled");
341+
}
342+
return new SortedSetOrdinalsIndexFieldData.Builder(normalizedFieldName, CoreValuesSourceType.BYTES);
270343
}
271344

272345
@Override
@@ -289,14 +362,22 @@ protected void parseCreateField(ParseContext context) throws IOException {
289362
BytesRef normalizedValueBytes = new BytesRef(normalizedValue);
290363

291364
// For retrieval: store original version string
292-
context.doc().add(new StoredField(fieldType().name(), versionString));
365+
if (fieldType().isStored()) {
366+
context.doc().add(new StoredField(fieldType().name(), versionString));
367+
}
293368

294369
// For searching (term queries): use original version string
295-
context.doc().add(new KeywordField(fieldType().name(), bytes, Field.Store.YES));
370+
if (fieldType().isSearchable()) {
371+
context.doc().add(new KeywordField(fieldType().name(), bytes, this.fieldType.stored() ? Field.Store.YES : Field.Store.NO));
372+
}
296373

297374
// For range queries and sorting: use normalized form
298-
context.doc().add(new KeywordField(fieldType().name() + NORMALIZED_FIELD_SUFFIX, normalizedValueBytes, Field.Store.NO));
299-
context.doc().add(new SortedSetDocValuesField(fieldType().name() + NORMALIZED_FIELD_SUFFIX, normalizedValueBytes));
375+
if (fieldType().hasDocValues() || fieldType().isSearchable()) {
376+
context.doc().add(new KeywordField(fieldType().name() + NORMALIZED_FIELD_SUFFIX, normalizedValueBytes, Field.Store.NO));
377+
}
378+
if (fieldType().hasDocValues()) {
379+
context.doc().add(new SortedSetDocValuesField(fieldType().name() + NORMALIZED_FIELD_SUFFIX, normalizedValueBytes));
380+
}
300381
}
301382

302383
@Override

0 commit comments

Comments
 (0)