diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index d5b7e90ef4..100d491bc0 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -34,7 +34,7 @@ The Apache Commons library has been removed as a dependency. There were a few lo * **Breaking change** Change 2 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN) * **Breaking change** Change 3 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN) * **Breaking change** Change 4 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN) -* **Breaking change** Change 5 [(Issue #NNN)](https://github.com/FoundationDB/fdb-record-layer/issues/NNN) +* **Breaking change** AnalyzerChooser no longer takes the text when chosing an analyzer [(Issue #2993)](https://github.com/FoundationDB/fdb-record-layer/issues/2993) // end next release --> diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/AnalyzerChooser.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/AnalyzerChooser.java index 7ce9348e1c..7699b81eba 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/AnalyzerChooser.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/AnalyzerChooser.java @@ -23,18 +23,11 @@ import org.apache.lucene.analysis.Analyzer; import javax.annotation.Nonnull; -import java.util.Collections; -import java.util.List; /** - * Choose an {@link Analyzer} given texts. + * Choose an {@link Analyzer}. */ public interface AnalyzerChooser { @Nonnull - default LuceneAnalyzerWrapper chooseAnalyzer(@Nonnull String text) { - return chooseAnalyzer(Collections.singletonList(text)); - } - - @Nonnull - LuceneAnalyzerWrapper chooseAnalyzer(@Nonnull List texts); + LuceneAnalyzerWrapper chooseAnalyzer(); } diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/EmailCjkSynonymAnalyzerFactory.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/EmailCjkSynonymAnalyzerFactory.java index e02dbad568..a6b7687b75 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/EmailCjkSynonymAnalyzerFactory.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/EmailCjkSynonymAnalyzerFactory.java @@ -65,7 +65,7 @@ public AnalyzerChooser getIndexAnalyzerChooser(@Nonnull Index index) { final String minLengthString = Optional.ofNullable(index.getOption(IndexOptions.TEXT_TOKEN_MIN_SIZE)).orElse(DEFAULT_MINIMUM_TOKEN_LENGTH); final String maxLengthString = Optional.ofNullable(index.getOption(IndexOptions.TEXT_TOKEN_MAX_SIZE)).orElse(Integer.toString(UAX29URLEmailAnalyzer.DEFAULT_MAX_TOKEN_LENGTH)); - return t -> new LuceneAnalyzerWrapper(UNIQUE_IDENTIFIER, + return () -> new LuceneAnalyzerWrapper(UNIQUE_IDENTIFIER, new EmailCjkSynonymAnalyzer(MINIMAL_STOP_WORDS, 1, Integer.parseInt(minLengthString), Integer.parseInt(maxLengthString), true, false, null)); } catch (NumberFormatException ex) { @@ -81,7 +81,7 @@ public AnalyzerChooser getQueryAnalyzerChooser(@Nonnull Index index, @Nonnull An final String minLengthString = Optional.ofNullable(index.getOption(IndexOptions.TEXT_TOKEN_MIN_SIZE)).orElse(DEFAULT_MINIMUM_TOKEN_LENGTH); final String maxLengthString = Optional.ofNullable(index.getOption(IndexOptions.TEXT_TOKEN_MAX_SIZE)).orElse(DEFAULT_MAXIMUM_TOKEN_LENGTH); final String synonymConfigName = index.getOption(LuceneIndexOptions.TEXT_SYNONYM_SET_NAME_OPTION); - return t -> new LuceneAnalyzerWrapper(UNIQUE_IDENTIFIER, + return () -> new LuceneAnalyzerWrapper(UNIQUE_IDENTIFIER, new EmailCjkSynonymAnalyzer(MINIMAL_STOP_WORDS, 1, Integer.parseInt(minLengthString), Integer.parseInt(maxLengthString), true, synonymConfigName != null, synonymConfigName != null ? SynonymMapRegistryImpl.instance().getSynonymMap(synonymConfigName) : null)); } catch (NumberFormatException ex) { diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerCombinationProvider.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerCombinationProvider.java index 179e56bbe0..f1dca5d27f 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerCombinationProvider.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerCombinationProvider.java @@ -24,8 +24,6 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; -import java.util.Collections; -import java.util.List; import java.util.Map; import java.util.SortedMap; import java.util.TreeMap; @@ -37,47 +35,39 @@ * The default analyzer chooser is used for all fields of one Lucene index except the fields which has overrides in the analyzer chooser per field mapping. */ public class LuceneAnalyzerCombinationProvider { - public static final String DELINEATOR_BETWEEN_KEY_AND_VALUE = ":"; - public static final String DELINEATOR_BETWEEN_KEY_VALUE_PAIRS = ","; - private AnalyzerChooser defaultIndexAnalyzerChooser; - private AnalyzerChooser defaultQueryAnalyzerChooser; - private Map indexAnalyzerChooserPerFieldOverride; - private Map queryAnalyzerChooserPerFieldOverride; + @Nonnull + private final LuceneAnalyzerWrapper indexAnalyzerWrapper; + @Nonnull + private final LuceneAnalyzerWrapper queryAnalyzerWrapper; - public LuceneAnalyzerCombinationProvider(@Nonnull AnalyzerChooser defaultIndexAnalyzerChooser, @Nonnull AnalyzerChooser defaultQueryAnalyzerChooser, - @Nullable Map indexAnalyzerChooserPerFieldOverride, @Nullable Map queryAnalyzerChooserPerFieldOverride) { - this.defaultIndexAnalyzerChooser = defaultIndexAnalyzerChooser; - this.defaultQueryAnalyzerChooser = defaultQueryAnalyzerChooser; - this.indexAnalyzerChooserPerFieldOverride = indexAnalyzerChooserPerFieldOverride; - this.queryAnalyzerChooserPerFieldOverride = queryAnalyzerChooserPerFieldOverride; + public LuceneAnalyzerCombinationProvider(@Nonnull AnalyzerChooser defaultIndexAnalyzerChooser, + @Nonnull AnalyzerChooser defaultQueryAnalyzerChooser, + @Nullable Map indexAnalyzerChooserPerFieldOverride, + @Nullable Map queryAnalyzerChooserPerFieldOverride) { + indexAnalyzerWrapper = buildAnalyzerWrapper(defaultIndexAnalyzerChooser, indexAnalyzerChooserPerFieldOverride); + queryAnalyzerWrapper = buildAnalyzerWrapper(defaultQueryAnalyzerChooser, queryAnalyzerChooserPerFieldOverride); } - public LuceneAnalyzerWrapper provideIndexAnalyzer(@Nonnull String text) { - return provideIndexAnalyzer(Collections.singletonList(text)); + @Nonnull + public LuceneAnalyzerWrapper provideIndexAnalyzer() { + return indexAnalyzerWrapper; } - public LuceneAnalyzerWrapper provideIndexAnalyzer(@Nonnull List texts) { - return buildAnalyzerWrapper(texts, defaultIndexAnalyzerChooser, indexAnalyzerChooserPerFieldOverride); - } - - public LuceneAnalyzerWrapper provideQueryAnalyzer(@Nonnull String text) { - return provideQueryAnalyzer(Collections.singletonList(text)); - } - - public LuceneAnalyzerWrapper provideQueryAnalyzer(@Nonnull List texts) { - return buildAnalyzerWrapper(texts, defaultQueryAnalyzerChooser, queryAnalyzerChooserPerFieldOverride); + @Nonnull + public LuceneAnalyzerWrapper provideQueryAnalyzer() { + return queryAnalyzerWrapper; } + @Nonnull @SuppressWarnings("PMD.CloseResource") - private static LuceneAnalyzerWrapper buildAnalyzerWrapper(@Nonnull List texts, - @Nonnull AnalyzerChooser defaultAnalyzerChooser, + private static LuceneAnalyzerWrapper buildAnalyzerWrapper(@Nonnull AnalyzerChooser defaultAnalyzerChooser, @Nullable Map customizedAnalyzerChooserPerField) { - final LuceneAnalyzerWrapper defaultAnalyzerWrapper = defaultAnalyzerChooser.chooseAnalyzer(texts); + final LuceneAnalyzerWrapper defaultAnalyzerWrapper = defaultAnalyzerChooser.chooseAnalyzer(); if (customizedAnalyzerChooserPerField != null) { // The order of keys matters because the identifier for each map needs to be consistent SortedMap analyzerWrapperMap = new TreeMap<>(customizedAnalyzerChooserPerField.entrySet().stream() - .collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().chooseAnalyzer(texts)))); + .collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().chooseAnalyzer()))); PerFieldAnalyzerWrapper analyzerWrapper = new PerFieldAnalyzerWrapper(defaultAnalyzerWrapper.getAnalyzer(), analyzerWrapperMap.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().getAnalyzer()))); diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerFactory.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerFactory.java index bc47f0158c..c965b70ad1 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerFactory.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerFactory.java @@ -75,6 +75,6 @@ public interface LuceneAnalyzerFactory { */ @Nonnull default AnalyzerChooser getQueryAnalyzerChooser(@Nonnull Index index, @Nonnull AnalyzerChooser indexAnalyzerChooser) { - return t -> LuceneAnalyzerWrapper.getStandardAnalyzerWrapper(); + return LuceneAnalyzerWrapper::getStandardAnalyzerWrapper; } } diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerRegistryImpl.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerRegistryImpl.java index a52d1cbbcc..62e47ee7bf 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerRegistryImpl.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerRegistryImpl.java @@ -26,7 +26,7 @@ import com.apple.foundationdb.record.metadata.Index; import com.apple.foundationdb.record.metadata.MetaDataException; import com.apple.foundationdb.record.util.ServiceLoaderProvider; -import com.apple.foundationdb.record.util.pair.Pair; +import com.apple.foundationdb.record.util.pair.NonnullPair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -96,14 +96,14 @@ public LuceneAnalyzerCombinationProvider getLuceneAnalyzerCombinationProvider(@N @Nonnull final Map auxiliaryFieldInfo) { final String defaultAnalyzerName = index.getOption(type.getAnalyzerOptionKey()); final String analyzerPerFieldName = index.getOption(type.getAnalyzerPerFieldOptionKey()); - Pair defaultAnalyzerChooserPair = getAnalyzerChooser(index, defaultAnalyzerName, type); + NonnullPair defaultAnalyzerChooserPair = getAnalyzerChooser(index, defaultAnalyzerName, type); Map indexAnalyzerChooserPerFieldOverride = new TreeMap<>(); Map queryAnalyzerChooserPerFieldOverride = new TreeMap<>(); if (analyzerPerFieldName != null) { LuceneIndexOptions.parseKeyValuePairOptionValue(analyzerPerFieldName).forEach((fieldName, analyzerName) -> { - Pair perFieldAnalyzerChooserPair = getAnalyzerChooser(index, analyzerName, type); + NonnullPair perFieldAnalyzerChooserPair = getAnalyzerChooser(index, analyzerName, type); indexAnalyzerChooserPerFieldOverride.put(fieldName, perFieldAnalyzerChooserPair.getLeft()); queryAnalyzerChooserPerFieldOverride.put(fieldName, perFieldAnalyzerChooserPair.getRight()); }); @@ -145,11 +145,11 @@ private static boolean isEligibleForNoOpAnalyzer(@Nonnull final LuceneIndexExpre return fieldInfo.getType() != LuceneIndexExpressions.DocumentFieldType.TEXT; } - private Pair getAnalyzerChooser(@Nonnull Index index, @Nullable String analyzerName, @Nonnull LuceneAnalyzerType type) { + private NonnullPair getAnalyzerChooser(@Nonnull Index index, @Nullable String analyzerName, @Nonnull LuceneAnalyzerType type) { final Map registryForType = Objects.requireNonNullElse(registry.get(type), Collections.emptyMap()); if (analyzerName == null || !registryForType.containsKey(analyzerName)) { - return Pair.of(t -> LuceneAnalyzerWrapper.getStandardAnalyzerWrapper(), - t -> LuceneAnalyzerWrapper.getStandardAnalyzerWrapper()); + return NonnullPair.of(LuceneAnalyzerWrapper::getStandardAnalyzerWrapper, + LuceneAnalyzerWrapper::getStandardAnalyzerWrapper); } else { LuceneAnalyzerFactory analyzerFactory = registryForType.get(analyzerName); if (analyzerFactory == null) { @@ -158,7 +158,7 @@ private Pair getAnalyzerChooser(@Nonnull Index LuceneLogMessageKeys.ANALYZER_TYPE, type.name()); } final AnalyzerChooser indexAnalyzerChooser = analyzerFactory.getIndexAnalyzerChooser(index); - return Pair.of(indexAnalyzerChooser, analyzerFactory.getQueryAnalyzerChooser(index, indexAnalyzerChooser)); + return NonnullPair.of(indexAnalyzerChooser, analyzerFactory.getQueryAnalyzerChooser(index, indexAnalyzerChooser)); } } } diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAutoCompleteAnalyzerFactory.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAutoCompleteAnalyzerFactory.java index 7972e14235..c48d9e8ae8 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAutoCompleteAnalyzerFactory.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAutoCompleteAnalyzerFactory.java @@ -48,6 +48,6 @@ public LuceneAnalyzerType getType() { @Nonnull @Override public AnalyzerChooser getIndexAnalyzerChooser(@Nonnull Index index) { - return t -> LuceneAnalyzerWrapper.getStandardAnalyzerWrapper(); + return LuceneAnalyzerWrapper::getStandardAnalyzerWrapper; } } diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAutoCompleteQueryClause.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAutoCompleteQueryClause.java index 12e283b699..cbed1e06a3 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAutoCompleteQueryClause.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneAutoCompleteQueryClause.java @@ -122,12 +122,11 @@ public BoundQuery bind(@Nonnull FDBRecordStoreBase store, @Nonnull Index inde final Map pointsConfigMap = LuceneIndexExpressions.constructPointConfigMap(store, index); LuceneQueryParserFactory parserFactory = LuceneQueryParserFactoryProvider.instance().getParserFactory(); final QueryParser parser = parserFactory.createMultiFieldQueryParser(fields.toArray(new String[0]), - analyzerSelector.provideIndexAnalyzer(searchKey).getAnalyzer(), pointsConfigMap); + analyzerSelector.provideIndexAnalyzer().getAnalyzer(), pointsConfigMap); - final var finalQuery = phraseQueryNeeded - ? buildQueryForPhraseMatching(parser, fields, searchKey) - : buildQueryForTermsMatching(analyzerSelector.provideIndexAnalyzer(searchKey).getAnalyzer(), fields, searchKey); + final Query finalQuery; + finalQuery = phraseQueryNeeded ? buildQueryForPhraseMatching(parser, fields, searchKey) : buildQueryForTermsMatching(analyzerSelector.provideIndexAnalyzer().getAnalyzer(), fields, searchKey); if (LOGGER.isDebugEnabled()) { LOGGER.debug(KeyValueLogMessage.build("query for auto-complete") .addKeyAndValue(LogMessageKeys.INDEX_NAME, index.getName()) diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintainer.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintainer.java index 7fb25ac366..f32b105978 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintainer.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintainer.java @@ -112,7 +112,7 @@ public class LuceneIndexMaintainer extends StandardIndexMaintainer { private static final Logger LOG = LoggerFactory.getLogger(LuceneIndexMaintainer.class); private final FDBDirectoryManager directoryManager; - private final LuceneAnalyzerCombinationProvider indexAnalyzerSelector; + private final LuceneAnalyzerCombinationProvider queryAnalyzerSelector; private final LuceneAnalyzerCombinationProvider autoCompleteAnalyzerSelector; public static final String PRIMARY_KEY_FIELD_NAME = "_p"; protected static final String PRIMARY_KEY_SEARCH_NAME = "_s"; @@ -128,7 +128,7 @@ public LuceneIndexMaintainer(@Nonnull final IndexMaintainerState state, @Nonnull this.executor = executor; this.directoryManager = createDirectoryManager(state); final var fieldInfos = LuceneIndexExpressions.getDocumentFieldDerivations(state.index, state.store.getRecordMetaData()); - this.indexAnalyzerSelector = LuceneAnalyzerRegistryImpl.instance().getLuceneAnalyzerCombinationProvider(state.index, LuceneAnalyzerType.FULL_TEXT, fieldInfos); + this.queryAnalyzerSelector = LuceneAnalyzerRegistryImpl.instance().getLuceneAnalyzerCombinationProvider(state.index, LuceneAnalyzerType.FULL_TEXT, fieldInfos); this.autoCompleteAnalyzerSelector = LuceneAnalyzerRegistryImpl.instance().getLuceneAnalyzerCombinationProvider(state.index, LuceneAnalyzerType.AUTO_COMPLETE, fieldInfos); String formatString = state.index.getOption(LuceneIndexOptions.PRIMARY_KEY_SERIALIZATION_FORMAT); keySerializer = LuceneIndexKeySerializer.fromStringFormat(formatString); @@ -175,7 +175,7 @@ public RecordCursor scan(@Nonnull final IndexScanBounds scanBounds, state.context.getPropertyStorage().getPropertyValue(LuceneRecordContextProperties.LUCENE_INDEX_CURSOR_PAGE_SIZE), scanProperties, state, scanQuery.getQuery(), scanQuery.getSort(), continuation, scanQuery.getGroupKey(), partitionInfo, scanQuery.getLuceneQueryHighlightParameters(), scanQuery.getTermMap(), - scanQuery.getStoredFields(), scanQuery.getStoredFieldTypes(), indexAnalyzerSelector, autoCompleteAnalyzerSelector); + scanQuery.getStoredFields(), scanQuery.getStoredFieldTypes(), queryAnalyzerSelector, autoCompleteAnalyzerSelector); } if (scanType.equals(LuceneScanTypes.BY_LUCENE_SPELL_CHECK)) { @@ -184,7 +184,8 @@ public RecordCursor scan(@Nonnull final IndexScanBounds scanBounds, } LuceneScanSpellCheck scanSpellcheck = (LuceneScanSpellCheck)scanBounds; return new LuceneSpellCheckRecordCursor(scanSpellcheck.getFields(), scanSpellcheck.getWord(), - executor, scanProperties, state, scanSpellcheck.getGroupKey(), partitioner.selectQueryPartitionId(scanSpellcheck.getGroupKey())); + executor, scanProperties, state, scanSpellcheck.getGroupKey(), + partitioner.selectQueryPartitionId(scanSpellcheck.getGroupKey())); } throw new RecordCoreException("unsupported scan type for Lucene index: " + scanType); @@ -255,7 +256,7 @@ private void writeDocument(@Nonnull List .filter(f -> f.getType().equals(LuceneIndexExpressions.DocumentFieldType.TEXT)) .map(f -> (String) f.getValue()).collect(Collectors.toList()); Document document = new Document(); - final IndexWriter newWriter = directoryManager.getIndexWriter(groupingKey, partitionId, indexAnalyzerSelector.provideIndexAnalyzer(texts)); + final IndexWriter newWriter = directoryManager.getIndexWriter(groupingKey, partitionId); BytesRef ref = new BytesRef(keySerializer.asPackedByteArray(primaryKey)); // use packed Tuple for the Stored and Sorted fields @@ -297,7 +298,7 @@ private Map> getIndex @SuppressWarnings({"PMD.CloseResource", "java:S2095"}) int deleteDocument(Tuple groupingKey, Integer partitionId, Tuple primaryKey) throws IOException { final long startTime = System.nanoTime(); - final IndexWriter indexWriter = directoryManager.getIndexWriter(groupingKey, partitionId, indexAnalyzerSelector.provideIndexAnalyzer("")); + final IndexWriter indexWriter = directoryManager.getIndexWriter(groupingKey, partitionId); @Nullable final LucenePrimaryKeySegmentIndex segmentIndex = directoryManager.getDirectory(groupingKey, partitionId).getPrimaryKeySegmentIndex(); if (segmentIndex != null) { @@ -358,7 +359,7 @@ public CompletableFuture mergeIndex() { return rebalancePartitions() .thenCompose(ignored -> { state.store.getIndexDeferredMaintenanceControl().setLastStep(IndexDeferredMaintenanceControl.LastStep.MERGE); - return directoryManager.mergeIndex(partitioner, indexAnalyzerSelector.provideIndexAnalyzer("")); + return directoryManager.mergeIndex(partitioner); }); } @@ -366,8 +367,7 @@ public CompletableFuture mergeIndex() { public void mergeIndexForTesting(@Nonnull final Tuple groupingKey, @Nullable final Integer partitionId, @Nonnull final AgilityContext agilityContext) throws IOException { - directoryManager.mergeIndexWithContext(indexAnalyzerSelector.provideIndexAnalyzer(""), - groupingKey, partitionId, agilityContext); + directoryManager.mergeIndexWithContext(groupingKey, partitionId, agilityContext); } @Nonnull diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQueryFieldComparisonClause.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQueryFieldComparisonClause.java index 804b4e66f7..d6242883a4 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQueryFieldComparisonClause.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQueryFieldComparisonClause.java @@ -279,7 +279,7 @@ public BoundQuery bind(@Nonnull FDBRecordStoreBase store, @Nonnull Index inde try { final var fieldInfos = LuceneIndexExpressions.getDocumentFieldDerivations(index, store.getRecordMetaData()); final LuceneAnalyzerCombinationProvider analyzerSelector = LuceneAnalyzerRegistryImpl.instance().getLuceneAnalyzerCombinationProvider(index, LuceneAnalyzerType.FULL_TEXT, fieldInfos); - final QueryParser parser = new QueryParser(field, analyzerSelector.provideQueryAnalyzer((String) comparand).getAnalyzer()); + final QueryParser parser = new QueryParser(field, analyzerSelector.provideQueryAnalyzer().getAnalyzer()); return toBoundQuery(parser.parse("\"" + comparand + "\"")); } catch (Exception ex) { throw new RecordCoreArgumentException("Unable to parse phrase for query", ex); diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQueryMultiFieldSearchClause.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQueryMultiFieldSearchClause.java index feebdd56d4..32471c15df 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQueryMultiFieldSearchClause.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQueryMultiFieldSearchClause.java @@ -72,7 +72,7 @@ public BoundQuery bind(@Nonnull FDBRecordStoreBase store, @Nonnull Index inde final Map pointsConfigMap = LuceneIndexExpressions.constructPointConfigMap(store, index); LuceneQueryParserFactory parserFactory = LuceneQueryParserFactoryProvider.instance().getParserFactory(); final QueryParser parser = parserFactory.createMultiFieldQueryParser(fieldNames, - analyzerSelector.provideQueryAnalyzer(searchString).getAnalyzer(), pointsConfigMap); + analyzerSelector.provideQueryAnalyzer().getAnalyzer(), pointsConfigMap); try { return toBoundQuery(parser.parse(searchString)); } catch (final Exception ioe) { diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQuerySearchClause.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQuerySearchClause.java index b144630148..8dedfa700b 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQuerySearchClause.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/LuceneQuerySearchClause.java @@ -82,7 +82,7 @@ public BoundQuery bind(@Nonnull FDBRecordStoreBase store, @Nonnull Index inde final Map pointsConfigMap = LuceneIndexExpressions.constructPointConfigMap(store, index); LuceneQueryParserFactory parserFactory = LuceneQueryParserFactoryProvider.instance().getParserFactory(); - final QueryParser parser = parserFactory.createQueryParser(defaultField, analyzerSelector.provideQueryAnalyzer(searchString).getAnalyzer(), pointsConfigMap); + final QueryParser parser = parserFactory.createQueryParser(defaultField, analyzerSelector.provideQueryAnalyzer().getAnalyzer(), pointsConfigMap); try { return toBoundQuery(parser.parse(searchString)); } catch (Exception ioe) { diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryManager.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryManager.java index b5a6d7d9a7..a83befa974 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryManager.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/directory/FDBDirectoryManager.java @@ -30,8 +30,11 @@ import com.apple.foundationdb.record.ScanProperties; import com.apple.foundationdb.record.cursors.ChainedCursor; import com.apple.foundationdb.record.logging.KeyValueLogMessage; +import com.apple.foundationdb.record.lucene.LuceneAnalyzerRegistryImpl; +import com.apple.foundationdb.record.lucene.LuceneAnalyzerType; import com.apple.foundationdb.record.lucene.LuceneAnalyzerWrapper; import com.apple.foundationdb.record.lucene.LuceneExceptions; +import com.apple.foundationdb.record.lucene.LuceneIndexExpressions; import com.apple.foundationdb.record.lucene.LuceneIndexTypes; import com.apple.foundationdb.record.lucene.LuceneLogMessageKeys; import com.apple.foundationdb.record.lucene.LucenePartitionInfoProto; @@ -81,6 +84,7 @@ public class FDBDirectoryManager implements AutoCloseable { private final Map createdDirectories; private final int mergeDirectoryCount; private final Exception exceptionAtCreation; + private final LuceneAnalyzerWrapper writerAnalyzer; protected FDBDirectoryManager(@Nonnull IndexMaintainerState state) { this.state = state; @@ -91,6 +95,9 @@ protected FDBDirectoryManager(@Nonnull IndexMaintainerState state) { } else { this.exceptionAtCreation = null; } + final var fieldInfos = LuceneIndexExpressions.getDocumentFieldDerivations(state.index, state.store.getRecordMetaData()); + final var analyzerSelector = LuceneAnalyzerRegistryImpl.instance().getLuceneAnalyzerCombinationProvider(state.index, LuceneAnalyzerType.FULL_TEXT, fieldInfos); + this.writerAnalyzer = analyzerSelector.provideIndexAnalyzer(); } @Override @@ -103,7 +110,7 @@ public synchronized void close() throws IOException { } @SuppressWarnings("PMD.CloseResource") - public CompletableFuture mergeIndex(@Nonnull LucenePartitioner partitioner, LuceneAnalyzerWrapper analyzerWrapper) { + public CompletableFuture mergeIndex(@Nonnull LucenePartitioner partitioner) { // This function will iterate the grouping keys and explicitly merge each final ScanProperties scanProperties = ScanProperties.FORWARD_SCAN.with( @@ -120,7 +127,7 @@ public CompletableFuture mergeIndex(@Nonnull LucenePartitioner partitioner if (! (rootExpression instanceof GroupingKeyExpression)) { // Here: empty grouping keys tuple - return mergeIndex(analyzerWrapper, TupleHelpers.EMPTY, partitioner, agilityContext) + return mergeIndex(TupleHelpers.EMPTY, partitioner, agilityContext) .whenComplete((ignore, ex) -> closeOrAbortAgilityContext(agilityContext, ex)); } // Here: iterate the grouping keys and merge each @@ -142,19 +149,19 @@ public CompletableFuture mergeIndex(@Nonnull LucenePartitioner partitioner // It may make sense in the future to make these concurrent, but there is enough complexity that it is // better to avoid the concurrent merges. // This also reduces the amount of load that a single store can cause on a system. - .forEachAsync(groupingKey -> mergeIndex(analyzerWrapper, groupingKey, partitioner, agilityContext), + .forEachAsync(groupingKey -> mergeIndex(groupingKey, partitioner, agilityContext), 1) .whenComplete((ignore, ex) -> closeOrAbortAgilityContext(agilityContext, ex)); } - private CompletableFuture mergeIndex(LuceneAnalyzerWrapper analyzerWrapper, Tuple groupingKey, + private CompletableFuture mergeIndex(Tuple groupingKey, @Nonnull LucenePartitioner partitioner, final AgilityContext agileContext) { // Note: We always flush before calls to `mergeIndexNow` because we won't come back to get the next partition // or group until after the merge which could be many seconds later, in which case the current transaction would // no longer be valid. It may make sense to have AgilityContext.Agile commit periodically regardless of activity if (!partitioner.isPartitioningEnabled()) { agileContext.flush(); - mergeIndexNow(analyzerWrapper, groupingKey, null); + mergeIndexNow(groupingKey, null); return AsyncUtil.DONE; } else { // Here: iterate the partition ids and merge each @@ -166,16 +173,16 @@ private CompletableFuture mergeIndex(LuceneAnalyzerWrapper analyzerWrapper return false; } agileContext.flush(); - mergeIndexNow(analyzerWrapper, groupingKey, partitionId); + mergeIndexNow(groupingKey, partitionId); return true; })); } } - private void mergeIndexNow(LuceneAnalyzerWrapper analyzerWrapper, Tuple groupingKey, @Nullable final Integer partitionId) { + private void mergeIndexNow(Tuple groupingKey, @Nullable final Integer partitionId) { final AgilityContext agilityContext = getAgilityContext(true, true); try { - mergeIndexWithContext(analyzerWrapper, groupingKey, partitionId, agilityContext); + mergeIndexWithContext(groupingKey, partitionId, agilityContext); } finally { // IndexWriter may release the file lock in a finally block in its own code, so if there is an error in its // code, we need to commit. We could optimize this a bit, and have it only flush if it has committed anything @@ -184,13 +191,12 @@ private void mergeIndexNow(LuceneAnalyzerWrapper analyzerWrapper, Tuple grouping } } - public void mergeIndexWithContext(@Nonnull final LuceneAnalyzerWrapper analyzerWrapper, - @Nonnull final Tuple groupingKey, - @Nullable final Integer partitionId, - @Nonnull final AgilityContext agilityContext) { + public void mergeIndexWithContext(@Nonnull final Tuple groupingKey, + @Nullable final Integer partitionId, + @Nonnull final AgilityContext agilityContext) { try (FDBDirectoryWrapper directoryWrapper = createDirectoryWrapper(groupingKey, partitionId, agilityContext)) { try { - directoryWrapper.mergeIndex(analyzerWrapper, exceptionAtCreation); + directoryWrapper.mergeIndex(writerAnalyzer, exceptionAtCreation); if (LOGGER.isDebugEnabled()) { LOGGER.debug(KeyValueLogMessage.of("Lucene merge success", LuceneLogMessageKeys.GROUP, groupingKey, @@ -350,8 +356,8 @@ public IndexReader getIndexReader(@Nullable Tuple groupingKey, @Nullable Integer } @Nonnull - public IndexWriter getIndexWriter(@Nullable Tuple groupingKey, @Nullable Integer partitionId, @Nonnull LuceneAnalyzerWrapper analyzerWrapper) throws IOException { - return getDirectoryWrapper(groupingKey, partitionId).getWriter(analyzerWrapper, exceptionAtCreation); + public IndexWriter getIndexWriter(@Nullable Tuple groupingKey, @Nullable Integer partitionId) throws IOException { + return getDirectoryWrapper(groupingKey, partitionId).getWriter(writerAnalyzer, exceptionAtCreation); } public DirectoryReader getDirectoryReader(@Nullable Tuple groupingKey, @Nullable Integer partititonId) throws IOException { diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/exact/ExactTokenAnalyzerFactory.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/exact/ExactTokenAnalyzerFactory.java index 9fca9dfa6f..7f21a76bcb 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/exact/ExactTokenAnalyzerFactory.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/exact/ExactTokenAnalyzerFactory.java @@ -28,7 +28,6 @@ import com.google.auto.service.AutoService; import javax.annotation.Nonnull; -import java.util.List; /** * Constructs a new instance of {@link ExactTokenAnalyzer}. @@ -59,7 +58,7 @@ public AnalyzerChooser getIndexAnalyzerChooser(@Nonnull Index ignored) { return new AnalyzerChooser() { @Nonnull @Override - public LuceneAnalyzerWrapper chooseAnalyzer(@Nonnull List ignored) { + public LuceneAnalyzerWrapper chooseAnalyzer() { return new LuceneAnalyzerWrapper(UNIQUE_NAME, new ExactTokenAnalyzer()); } }; @@ -71,7 +70,7 @@ public AnalyzerChooser getQueryAnalyzerChooser(@Nonnull Index ignored, @Nonnull return new AnalyzerChooser() { @Nonnull @Override - public LuceneAnalyzerWrapper chooseAnalyzer(@Nonnull List ignored) { + public LuceneAnalyzerWrapper chooseAnalyzer() { return new LuceneAnalyzerWrapper(UNIQUE_NAME, new ExactTokenAnalyzer()); } }; diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/highlight/LuceneHighlighting.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/highlight/LuceneHighlighting.java index be09b0adb5..736b9ec89b 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/highlight/LuceneHighlighting.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/highlight/LuceneHighlighting.java @@ -170,7 +170,7 @@ private static Object highlight(final @Nonnull LuceneAnalyzerCombinationProvider final String fieldName, final String value, final String termName) { - final LuceneAnalyzerWrapper queryAnalyzer = analyzerSelector.provideIndexAnalyzer(value); + final LuceneAnalyzerWrapper queryAnalyzer = analyzerSelector.provideIndexAnalyzer(); UnifiedHighlighter highlighter = makeHighlighter(fieldName, queryAnalyzer.getAnalyzer(), luceneQueryHighlightParameters.getSnippedSize()); try { return highlighter.highlightWithoutSearcher(termName, luceneQueryHighlightParameters.getQuery(), value, luceneQueryHighlightParameters.getMaxMatchCount()); diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/ngram/NgramAnalyzer.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/ngram/NgramAnalyzer.java index 49e981341d..fac5356a81 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/ngram/NgramAnalyzer.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/ngram/NgramAnalyzer.java @@ -108,7 +108,7 @@ public AnalyzerChooser getIndexAnalyzerChooser(@Nonnull Index index) { final String minLengthString = Optional.ofNullable(index.getOption(IndexOptions.TEXT_TOKEN_MIN_SIZE)).orElse(DEFAULT_MINIMUM_NGRAM_TOKEN_LENGTH); final String maxLengthString = Optional.ofNullable(index.getOption(IndexOptions.TEXT_TOKEN_MAX_SIZE)).orElse(DEFAULT_MAXIMUM_NGRAM_TOKEN_LENGTH); final String edgesOnly = Optional.ofNullable(index.getOption(LuceneIndexOptions.NGRAM_TOKEN_EDGES_ONLY)).orElse(DEFAULT_NGRAM_WITH_EDGES_ONLY); - return t -> new LuceneAnalyzerWrapper(ANALYZER_FACTORY_NAME, + return () -> new LuceneAnalyzerWrapper(ANALYZER_FACTORY_NAME, new NgramAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, Integer.parseInt(minLengthString), Integer.parseInt(maxLengthString), Boolean.parseBoolean(edgesOnly))); } catch (NumberFormatException ex) { throw new RecordCoreArgumentException("Invalid index option for token size", ex); diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/package-info.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/package-info.java index d718a4abcd..86e58107ed 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/package-info.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/package-info.java @@ -24,7 +24,7 @@ *

* Lucene indexes are backed by FDB, using {@link com.apple.foundationdb.record.lucene.directory.FDBDirectory} to implement a virtual file system holding the inverted index files. * This is not fundamental, though. This maintainer used standard {@link org.apache.lucene.index.IndexWriter} and {@link org.apache.lucene.index.IndexReader}, gotten with - * {@link com.apple.foundationdb.record.lucene.directory.FDBDirectoryManager#getIndexWriter(com.apple.foundationdb.tuple.Tuple, java.lang.Integer, com.apple.foundationdb.record.lucene.LuceneAnalyzerWrapper)} getIndexWriter}, + * {@link com.apple.foundationdb.record.lucene.directory.FDBDirectoryManager#getIndexWriter(com.apple.foundationdb.tuple.Tuple, java.lang.Integer)} getIndexWriter}, * for interfacing to Lucene. *

* diff --git a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/synonym/SynonymAnalyzer.java b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/synonym/SynonymAnalyzer.java index 813c06763e..f6b6f62f9a 100644 --- a/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/synonym/SynonymAnalyzer.java +++ b/fdb-record-layer-lucene/src/main/java/com/apple/foundationdb/record/lucene/synonym/SynonymAnalyzer.java @@ -129,7 +129,7 @@ public LuceneAnalyzerType getType() { @Nonnull @Override public AnalyzerChooser getIndexAnalyzerChooser(@Nonnull Index index) { - return t -> LuceneAnalyzerWrapper.getStandardAnalyzerWrapper(); + return LuceneAnalyzerWrapper::getStandardAnalyzerWrapper; } @SuppressWarnings("deprecation") @@ -138,7 +138,7 @@ public AnalyzerChooser getIndexAnalyzerChooser(@Nonnull Index index) { public AnalyzerChooser getQueryAnalyzerChooser(@Nonnull Index index, @Nonnull AnalyzerChooser indexAnalyzerChooser) { final String name = Objects.requireNonNullElse(index.getOption(LuceneIndexOptions.TEXT_SYNONYM_SET_NAME_OPTION), EnglishSynonymMapConfig.ExpandedEnglishSynonymMapConfig.CONFIG_NAME); - return t -> new LuceneAnalyzerWrapper(ANALYZER_FACTORY_NAME, + return () -> new LuceneAnalyzerWrapper(ANALYZER_FACTORY_NAME, new SynonymAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, name)); } } @@ -169,7 +169,7 @@ public LuceneAnalyzerType getType() { public AnalyzerChooser getIndexAnalyzerChooser(@Nonnull Index index) { final String name = Objects.requireNonNullElse(index.getOption(LuceneIndexOptions.TEXT_SYNONYM_SET_NAME_OPTION), EnglishSynonymMapConfig.AuthoritativeOnlyEnglishSynonymMapConfig.CONFIG_NAME); - return t -> new LuceneAnalyzerWrapper(ANALYZER_FACTORY_NAME, + return () -> new LuceneAnalyzerWrapper(ANALYZER_FACTORY_NAME, new SynonymAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, name)); } diff --git a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerRegistryTest.java b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerRegistryTest.java index 01156902a3..c372431821 100644 --- a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerRegistryTest.java +++ b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneAnalyzerRegistryTest.java @@ -47,9 +47,9 @@ void searchForAutoCompleteWithSynonymEnabledOnFullText() { LuceneIndexOptions.TEXT_SYNONYM_SET_NAME_OPTION, EnglishSynonymMapConfig.ExpandedEnglishSynonymMapConfig.CONFIG_NAME)); // Assert the synonym analyzer is used for query analyzer for full-text search Assertions.assertEquals(SynonymAnalyzer.QueryOnlySynonymAnalyzerFactory.ANALYZER_FACTORY_NAME, - LuceneAnalyzerRegistryImpl.instance().getLuceneAnalyzerCombinationProvider(index, LuceneAnalyzerType.FULL_TEXT, Map.of()).provideQueryAnalyzer("").getUniqueIdentifier()); + LuceneAnalyzerRegistryImpl.instance().getLuceneAnalyzerCombinationProvider(index, LuceneAnalyzerType.FULL_TEXT, Map.of()).provideQueryAnalyzer().getUniqueIdentifier()); // Assert the standard analyzer is used for query analyzer for auto-complete suggestions Assertions.assertEquals(LuceneAnalyzerWrapper.STANDARD_ANALYZER_NAME, - LuceneAnalyzerRegistryImpl.instance().getLuceneAnalyzerCombinationProvider(index, LuceneAnalyzerType.AUTO_COMPLETE, Map.of()).provideQueryAnalyzer("").getUniqueIdentifier()); + LuceneAnalyzerRegistryImpl.instance().getLuceneAnalyzerCombinationProvider(index, LuceneAnalyzerType.AUTO_COMPLETE, Map.of()).provideQueryAnalyzer().getUniqueIdentifier()); } } diff --git a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneDocumentFromRecordTest.java b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneDocumentFromRecordTest.java index 58b79cc8df..c774d05873 100644 --- a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneDocumentFromRecordTest.java +++ b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneDocumentFromRecordTest.java @@ -55,8 +55,9 @@ */ class LuceneDocumentFromRecordTest { - private LuceneAnalyzerCombinationProvider analyzerProvider = new LuceneAnalyzerCombinationProvider(t -> LuceneAnalyzerWrapper.getStandardAnalyzerWrapper(), - t -> LuceneAnalyzerWrapper.getStandardAnalyzerWrapper(), + private final LuceneAnalyzerCombinationProvider analyzerProvider = new LuceneAnalyzerCombinationProvider( + LuceneAnalyzerWrapper::getStandardAnalyzerWrapper, + LuceneAnalyzerWrapper::getStandardAnalyzerWrapper, null, null); @Test diff --git a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintenanceTest.java b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintenanceTest.java index ebf8dc8d32..7a9f43834b 100644 --- a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintenanceTest.java +++ b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexMaintenanceTest.java @@ -733,7 +733,7 @@ void mergeLosesLockTest(int failurePercentage) throws IOException { final var fieldInfos = LuceneIndexExpressions.getDocumentFieldDerivations(state.index, state.store.getRecordMetaData()); LuceneAnalyzerCombinationProvider indexAnalyzerSelector = LuceneAnalyzerRegistryImpl.instance().getLuceneAnalyzerCombinationProvider(state.index, LuceneAnalyzerType.FULL_TEXT, fieldInfos); - assertThrows(IOException.class, () -> fdbDirectoryWrapper.mergeIndex(indexAnalyzerSelector.provideIndexAnalyzer(""), new Exception()), "invalid lock"); + assertThrows(IOException.class, () -> fdbDirectoryWrapper.mergeIndex(indexAnalyzerSelector.provideIndexAnalyzer(), new Exception()), "invalid lock"); commit(context); } } diff --git a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTest.java b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTest.java index edccaccdec..598ff3bec8 100644 --- a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTest.java +++ b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTest.java @@ -147,7 +147,6 @@ import static com.apple.foundationdb.record.lucene.LuceneIndexOptions.INDEX_PARTITION_HIGH_WATERMARK; import static com.apple.foundationdb.record.lucene.LuceneIndexOptions.INDEX_PARTITION_LOW_WATERMARK; import static com.apple.foundationdb.record.lucene.LuceneIndexOptions.PRIMARY_KEY_SEGMENT_INDEX_V2_ENABLED; -import static com.apple.foundationdb.record.lucene.LuceneIndexTestUtils.ANALYZER_CHOOSER_TEST_LUCENE_INDEX_KEY; import static com.apple.foundationdb.record.lucene.LuceneIndexTestUtils.AUTHORITATIVE_SYNONYM_ONLY_LUCENE_INDEX_KEY; import static com.apple.foundationdb.record.lucene.LuceneIndexTestUtils.AUTO_COMPLETE_SIMPLE_LUCENE_INDEX_KEY; import static com.apple.foundationdb.record.lucene.LuceneIndexTestUtils.COMBINED_SYNONYM_SETS; @@ -311,10 +310,6 @@ private void metaDataHookSyntheticRecordComplexJoinedToMap(RecordMetaDataBuilder metaDataBuilder.addIndex(joined, index); } - private static final Index ANALYZER_CHOOSER_TEST_LUCENE_INDEX = new Index("analyzer_chooser_test_index", function(LuceneFunctionNames.LUCENE_TEXT, field("text")), LuceneIndexTypes.LUCENE, - ImmutableMap.of( - LuceneIndexOptions.LUCENE_ANALYZER_NAME_OPTION, LuceneIndexTestUtils.TestAnalyzerFactory.ANALYZER_FACTORY_NAME)); - private static final Index JOINED_INDEX_NOGROUP = getJoinedIndexNoGroup(Map.of( INDEX_PARTITION_BY_FIELD_NAME, "complex.timestamp", INDEX_PARTITION_HIGH_WATERMARK, "10")); @@ -5670,35 +5665,6 @@ void testSimpleAutoComplete(IndexedType indexedType) { } } - @ParameterizedTest - @MethodSource(LUCENE_INDEX_MAP_PARAMS) - void analyzerChooserTest(IndexedType indexedType) { - final Index index = indexedType.getIndex(ANALYZER_CHOOSER_TEST_LUCENE_INDEX_KEY); - try (FDBRecordContext context = openContext()) { - if (indexedType.isSynthetic()) { - openRecordStore(context, metaDataBuilder -> metaDataHookSyntheticRecordComplexJoinedToSimple(metaDataBuilder, index)); - // Synonym analyzer is chosen due to the keyword "synonym" from the text - createComplexRecordJoinedToSimple(1, 1623L, 1623L, "synonym food", "", false, System.currentTimeMillis(), 0); - // Ngram analyzer is chosen due to no keyword "synonym" from the text - createComplexRecordJoinedToSimple(2, 1624L, 1624L, "ngram motivation", "", false, System.currentTimeMillis(), 1); - } else { - rebuildIndexMetaData(context, SIMPLE_DOC, index); - // Synonym analyzer is chosen due to the keyword "synonym" from the text - recordStore.saveRecord(createSimpleDocument(1623L, "synonym food", 1)); - // Ngram analyzer is chosen due to no keyword "synonym" from the text - recordStore.saveRecord(createSimpleDocument(1624L, "ngram motivation", 1)); - } - assertEquals(1, recordStore.scanIndex(index, fullTextSearch(index, "nutrient"), null, ScanProperties.FORWARD_SCAN) - .getCount().join()); - assertEquals(0, recordStore.scanIndex(index, fullTextSearch(index, "foo"), null, ScanProperties.FORWARD_SCAN) - .getCount().join()); - assertEquals(0, recordStore.scanIndex(index, fullTextSearch(index, "need"), null, ScanProperties.FORWARD_SCAN) - .getCount().join()); - assertEquals(1, recordStore.scanIndex(index, fullTextSearch(index, "motivatio"), null, ScanProperties.FORWARD_SCAN) - .getCount().join()); - } - } - @ParameterizedTest @MethodSource(LUCENE_INDEX_MAP_PARAMS) void basicLuceneCursorTest(IndexedType indexedType) { diff --git a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTestUtils.java b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTestUtils.java index c1b187940d..99b14b04df 100644 --- a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTestUtils.java +++ b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTestUtils.java @@ -47,12 +47,10 @@ import com.apple.foundationdb.record.query.plan.cascades.debug.Debugger; import com.apple.foundationdb.record.query.plan.debug.DebuggerWithSymbolTables; import com.apple.foundationdb.record.util.pair.Pair; -import com.google.auto.service.AutoService; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; -import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.search.Sort; import javax.annotation.Nonnull; @@ -108,7 +106,6 @@ public class LuceneIndexTestUtils { protected static final String SPELLCHECK_INDEX_COMPLEX_KEY = "spellcheck_index_complex_key"; protected static final String COMPLEX_MULTIPLE_GROUPED_KEY = "complex_multiple_grouped_key"; protected static final String COMPLEX_MULTI_GROUPED_WITH_AUTO_COMPLETE_KEY = "complex_multi_grouped_with_auto_complete_key"; - protected static final String ANALYZER_CHOOSER_TEST_LUCENE_INDEX_KEY = "analyzer_chooser_test_lucene_index_key"; protected static final String AUTO_COMPLETE_SIMPLE_LUCENE_INDEX_KEY = "auto_complete_simple_lucene_index_key"; public static final KeyExpression SIMPLE_TEXT_WITH_AUTO_COMPLETE_STORED_FIELD = function(LuceneFunctionNames.LUCENE_TEXT, field("text")); @@ -328,10 +325,6 @@ public static Index textAndStoredComplexIndex(final Consumer LuceneIndexTypes.LUCENE, ImmutableMap.of()); - private static final Index ANALYZER_CHOOSER_TEST_LUCENE_INDEX = new Index("analyzer_chooser_test_index", function(LuceneFunctionNames.LUCENE_TEXT, field("text")), LuceneIndexTypes.LUCENE, - ImmutableMap.of( - LuceneIndexOptions.LUCENE_ANALYZER_NAME_OPTION, TestAnalyzerFactory.ANALYZER_FACTORY_NAME)); - private static final Index AUTO_COMPLETE_SIMPLE_LUCENE_INDEX = new Index("Complex$multiple_analyzer_autocomplete", concat(function(LuceneFunctionNames.LUCENE_TEXT, field("text")), function(LuceneFunctionNames.LUCENE_TEXT, field("text2"))), LuceneIndexTypes.LUCENE, @@ -396,21 +389,6 @@ public static Index textAndStoredComplexIndex(final Consumer LuceneIndexOptions.TEXT_SYNONYM_SET_NAME_OPTION, EnglishSynonymMapConfig.ExpandedEnglishSynonymMapConfig.CONFIG_NAME, LuceneIndexOptions.OPTIMIZED_STORED_FIELDS_FORMAT_ENABLED, "true")); - protected static final Index JOINED_COMPLEX_MULTIPLE_TEXT_ANALYZER_CHOOSER_INDEXES = - new Index("JoinedComplex$text_analyzerChooser_multipleIndexes", - concat( - field("complex").nest(function(LuceneFunctionNames.LUCENE_STORED, field("is_seen"))), - field("simple").nest(function(LuceneFunctionNames.LUCENE_TEXT, field("text"))), - field("complex").nest(function(LuceneFunctionNames.LUCENE_TEXT, field("text2"))), - field("complex").nest(function(LuceneFunctionNames.LUCENE_STORED, field("score"))), - field("complex").nest(function(LuceneFunctionNames.LUCENE_STORED, field("group"))), - field("complex").nest(function(LuceneFunctionNames.LUCENE_SORTED, field("timestamp"))) - ), LuceneIndexTypes.LUCENE, - ImmutableMap.of(IndexOptions.TEXT_TOKENIZER_NAME_OPTION, AllSuffixesTextTokenizer.NAME, - LuceneIndexOptions.TEXT_SYNONYM_SET_NAME_OPTION, EnglishSynonymMapConfig.ExpandedEnglishSynonymMapConfig.CONFIG_NAME, - LuceneIndexOptions.LUCENE_ANALYZER_NAME_OPTION, TestAnalyzerFactory.ANALYZER_FACTORY_NAME, - LuceneIndexOptions.OPTIMIZED_STORED_FIELDS_FORMAT_ENABLED, "true")); - protected static final Index JOINED_AUTHORITATIVE_SYNONYM_COMPLEX_MULTIPLE_TEXT_INDEXES = new Index("JoinedSynonymAuthoritativeComplex$text_multipleIndexes", concat( @@ -841,46 +819,6 @@ public static void rebalancePartitions(final FDBRecordStore recordStore, final I } } - /** - * A testing analyzer factory to verify the logic for {@link AnalyzerChooser}. - */ - @AutoService(LuceneAnalyzerFactory.class) - public static class TestAnalyzerFactory implements LuceneAnalyzerFactory { - protected static final String ANALYZER_FACTORY_NAME = "TEST_ANALYZER"; - - @Override - @Nonnull - public String getName() { - return ANALYZER_FACTORY_NAME; - } - - @Override - @Nonnull - public LuceneAnalyzerType getType() { - return LuceneAnalyzerType.FULL_TEXT; - } - - @Override - @Nonnull - public AnalyzerChooser getIndexAnalyzerChooser(@Nonnull Index index) { - return new TestAnalyzerChooser(); - } - } - - private static class TestAnalyzerChooser implements AnalyzerChooser { - @Override - @Nonnull - public LuceneAnalyzerWrapper chooseAnalyzer(@Nonnull List texts) { - if (texts.stream().anyMatch(t -> t.contains("synonym"))) { - return new LuceneAnalyzerWrapper("TEST_SYNONYM", - new SynonymAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, EnglishSynonymMapConfig.ExpandedEnglishSynonymMapConfig.CONFIG_NAME)); - } else { - return new LuceneAnalyzerWrapper("TEST_NGRAM", - new NgramAnalyzer(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, 3, 30, false)); - } - } - } - /** * Whether to test with synthetic record types or standard record types. */ @@ -907,7 +845,6 @@ public enum IndexedType { Map.entry(SPELLCHECK_INDEX_COMPLEX_KEY, JOINED_COMPLEX_MULTIPLE_TEXT_INDEXES), Map.entry(COMPLEX_MULTIPLE_GROUPED_KEY, JOINED_COMPLEX_GROUPED_WITH_PRIMARY_KEY_SEGMENT_INDEX), Map.entry(COMPLEX_MULTI_GROUPED_WITH_AUTO_COMPLETE_KEY, JOINED_COMPLEX_GROUPED_WITH_PRIMARY_KEY_SEGMENT_INDEX), - Map.entry(ANALYZER_CHOOSER_TEST_LUCENE_INDEX_KEY, JOINED_COMPLEX_MULTIPLE_TEXT_ANALYZER_CHOOSER_INDEXES), Map.entry(AUTO_COMPLETE_SIMPLE_LUCENE_INDEX_KEY, JOINED_COMPLEX_MULTIPLE_TEXT_INDEXES)), true, List.of(1373414429, -542327065, 1373414429, -1751615347, -1644529491, -1644529491, 2019229269)), @@ -933,7 +870,6 @@ public enum IndexedType { Map.entry(SPELLCHECK_INDEX_COMPLEX_KEY, SPELLCHECK_INDEX_COMPLEX), Map.entry(COMPLEX_MULTIPLE_GROUPED_KEY, COMPLEX_MULTIPLE_GROUPED), Map.entry(COMPLEX_MULTI_GROUPED_WITH_AUTO_COMPLETE_KEY, COMPLEX_MULTI_GROUPED_WITH_AUTO_COMPLETE), - Map.entry(ANALYZER_CHOOSER_TEST_LUCENE_INDEX_KEY, ANALYZER_CHOOSER_TEST_LUCENE_INDEX), Map.entry(AUTO_COMPLETE_SIMPLE_LUCENE_INDEX_KEY, AUTO_COMPLETE_SIMPLE_LUCENE_INDEX)), false, List.of(1498044543, -417696951, -687982540, -1626985233, -1008465729, 1532371150, -42167700)); diff --git a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTestValidator.java b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTestValidator.java index d72aabb3c3..73e6be4852 100644 --- a/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTestValidator.java +++ b/fdb-record-layer-lucene/src/test/java/com/apple/foundationdb/record/lucene/LuceneIndexTestValidator.java @@ -314,7 +314,7 @@ public static void validatePrimaryKeySegmentIndex(@Nonnull FDBRecordStore record .collect(Collectors.toList()), message); } - directoryManager.getIndexWriter(groupingKey, partitionId, LuceneAnalyzerWrapper.getStandardAnalyzerWrapper()); + directoryManager.getIndexWriter(groupingKey, partitionId); final DirectoryReader directoryReader = directoryManager.getDirectoryReader(groupingKey, partitionId); for (final Tuple primaryKey : expectedPrimaryKeys) { assertNotNull(primaryKeySegmentIndex.findDocument(directoryReader, primaryKey),