diff --git a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java
index 75d7081e7729a..22dc0846e610b 100644
--- a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java
+++ b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java
@@ -179,6 +179,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
BitsetFilterCache.INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING,
IndexModule.INDEX_STORE_TYPE_SETTING,
IndexModule.INDEX_STORE_PRE_LOAD_SETTING,
+ IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS,
IndexModule.INDEX_RECOVERY_TYPE_SETTING,
IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING,
FsDirectoryFactory.INDEX_LOCK_FACTOR_SETTING,
diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java
index 46c2b91b4b99a..f8604caeab414 100644
--- a/server/src/main/java/org/opensearch/index/IndexModule.java
+++ b/server/src/main/java/org/opensearch/index/IndexModule.java
@@ -148,6 +148,18 @@ public final class IndexModule {
Property.NodeScope
);
+ /** Which lucene file extensions to load with the mmap directory when using hybridfs store.
+ * This is an expert setting.
+ * @see Lucene File Extensions.
+ */
+ public static final Setting> INDEX_STORE_HYBRID_MMAP_EXTENSIONS = Setting.listSetting(
+ "index.store.hybrid.mmap.extensions",
+ List.of("nvd", "dvd", "tim", "tip", "dim", "kdd", "kdi", "cfs", "doc"),
+ Function.identity(),
+ Property.IndexScope,
+ Property.NodeScope
+ );
+
public static final String SIMILARITY_SETTINGS_PREFIX = "index.similarity";
// whether to use the query cache
diff --git a/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java
index 57cc9f09ac37d..88c7e12632863 100644
--- a/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java
+++ b/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java
@@ -97,9 +97,10 @@ protected Directory newFSDirectory(Path location, LockFactory lockFactory, Index
case HYBRIDFS:
// Use Lucene defaults
final FSDirectory primaryDirectory = FSDirectory.open(location, lockFactory);
+ final Set mmapExtensions = new HashSet<>(indexSettings.getValue(IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS));
if (primaryDirectory instanceof MMapDirectory) {
MMapDirectory mMapDirectory = (MMapDirectory) primaryDirectory;
- return new HybridDirectory(lockFactory, setPreload(mMapDirectory, lockFactory, preLoadExtensions));
+ return new HybridDirectory(lockFactory, setPreload(mMapDirectory, lockFactory, preLoadExtensions), mmapExtensions);
} else {
return primaryDirectory;
}
@@ -142,10 +143,12 @@ public static boolean isHybridFs(Directory directory) {
*/
static final class HybridDirectory extends NIOFSDirectory {
private final MMapDirectory delegate;
+ private final Set mmapExtensions;
- HybridDirectory(LockFactory lockFactory, MMapDirectory delegate) throws IOException {
+ HybridDirectory(LockFactory lockFactory, MMapDirectory delegate, Set mmapExtensions) throws IOException {
super(delegate.getDirectory(), lockFactory);
this.delegate = delegate;
+ this.mmapExtensions = mmapExtensions;
}
@Override
@@ -164,43 +167,16 @@ public IndexInput openInput(String name, IOContext context) throws IOException {
}
}
+ boolean useDelegate(String name) {
+ final String extension = FileSwitchDirectory.getExtension(name);
+ return mmapExtensions.contains(extension);
+ }
+
@Override
public void close() throws IOException {
IOUtils.close(super::close, delegate);
}
- boolean useDelegate(String name) {
- String extension = FileSwitchDirectory.getExtension(name);
- switch (extension) {
- // Norms, doc values and term dictionaries are typically performance-sensitive and hot in the page
- // cache, so we use mmap, which provides better performance.
- case "nvd":
- case "dvd":
- case "tim":
- // We want to open the terms index and KD-tree index off-heap to save memory, but this only performs
- // well if using mmap.
- case "tip":
- // dim files only apply up to lucene 8.x indices. It can be removed once we are in lucene 10
- case "dim":
- case "kdd":
- case "kdi":
- // Compound files are tricky because they store all the information for the segment. Benchmarks
- // suggested that not mapping them hurts performance.
- case "cfs":
- // MMapDirectory has special logic to read long[] arrays in little-endian order that helps speed
- // up the decoding of postings. The same logic applies to positions (.pos) of offsets (.pay) but we
- // are not mmaping them as queries that leverage positions are more costly and the decoding of postings
- // tends to be less a bottleneck.
- case "doc":
- return true;
- // Other files are either less performance-sensitive (e.g. stored field index, norms metadata)
- // or are large and have a random access pattern and mmap leads to page cache trashing
- // (e.g. stored fields and term vectors).
- default:
- return false;
- }
- }
-
MMapDirectory getDelegate() {
return delegate;
}
diff --git a/server/src/test/java/org/opensearch/index/store/FsDirectoryFactoryTests.java b/server/src/test/java/org/opensearch/index/store/FsDirectoryFactoryTests.java
index 70d4ae790f146..cf8d6677b4227 100644
--- a/server/src/test/java/org/opensearch/index/store/FsDirectoryFactoryTests.java
+++ b/server/src/test/java/org/opensearch/index/store/FsDirectoryFactoryTests.java
@@ -70,20 +70,52 @@ public void testPreload() throws IOException {
try (Directory directory = newDirectory(build)) {
assertTrue(FsDirectoryFactory.isHybridFs(directory));
FsDirectoryFactory.HybridDirectory hybridDirectory = (FsDirectoryFactory.HybridDirectory) directory;
- assertTrue(hybridDirectory.useDelegate("foo.dvd"));
+ // test default hybrid mmap extensions
assertTrue(hybridDirectory.useDelegate("foo.nvd"));
+ assertTrue(hybridDirectory.useDelegate("foo.dvd"));
assertTrue(hybridDirectory.useDelegate("foo.tim"));
assertTrue(hybridDirectory.useDelegate("foo.tip"));
- assertTrue(hybridDirectory.useDelegate("foo.cfs"));
assertTrue(hybridDirectory.useDelegate("foo.dim"));
assertTrue(hybridDirectory.useDelegate("foo.kdd"));
assertTrue(hybridDirectory.useDelegate("foo.kdi"));
- assertFalse(hybridDirectory.useDelegate("foo.bar"));
+ assertTrue(hybridDirectory.useDelegate("foo.cfs"));
+ assertTrue(hybridDirectory.useDelegate("foo.doc"));
+ assertFalse(hybridDirectory.useDelegate("foo.pos"));
+ assertFalse(hybridDirectory.useDelegate("foo.pay"));
MMapDirectory delegate = hybridDirectory.getDelegate();
assertThat(delegate, Matchers.instanceOf(FsDirectoryFactory.PreLoadMMapDirectory.class));
FsDirectoryFactory.PreLoadMMapDirectory preLoadMMapDirectory = (FsDirectoryFactory.PreLoadMMapDirectory) delegate;
assertTrue(preLoadMMapDirectory.useDelegate("foo.dvd"));
assertTrue(preLoadMMapDirectory.useDelegate("foo.bar"));
+ assertFalse(preLoadMMapDirectory.useDelegate("foo.cfs"));
+ }
+ build = Settings.builder()
+ .put(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.HYBRIDFS.name().toLowerCase(Locale.ROOT))
+ .putList(IndexModule.INDEX_STORE_PRE_LOAD_SETTING.getKey(), "nvd", "dvd", "cfs")
+ .putList(IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS.getKey(), "nvd", "dvd", "tim", "pos", "pay")
+ .build();
+ try (Directory directory = newDirectory(build)) {
+ assertTrue(FsDirectoryFactory.isHybridFs(directory));
+ FsDirectoryFactory.HybridDirectory hybridDirectory = (FsDirectoryFactory.HybridDirectory) directory;
+ // test custom hybrid mmap extensions
+ assertTrue(hybridDirectory.useDelegate("foo.nvd"));
+ assertTrue(hybridDirectory.useDelegate("foo.dvd"));
+ assertTrue(hybridDirectory.useDelegate("foo.tim"));
+ assertFalse(hybridDirectory.useDelegate("foo.tip"));
+ assertFalse(hybridDirectory.useDelegate("foo.dim"));
+ assertFalse(hybridDirectory.useDelegate("foo.kdd"));
+ assertFalse(hybridDirectory.useDelegate("foo.kdi"));
+ assertFalse(hybridDirectory.useDelegate("foo.cfs"));
+ assertFalse(hybridDirectory.useDelegate("foo.doc"));
+ assertTrue(hybridDirectory.useDelegate("foo.pos"));
+ assertTrue(hybridDirectory.useDelegate("foo.pay"));
+ MMapDirectory delegate = hybridDirectory.getDelegate();
+ assertThat(delegate, Matchers.instanceOf(FsDirectoryFactory.PreLoadMMapDirectory.class));
+ FsDirectoryFactory.PreLoadMMapDirectory preLoadMMapDirectory = (FsDirectoryFactory.PreLoadMMapDirectory) delegate;
+ assertTrue(preLoadMMapDirectory.useDelegate("foo.dvd"));
+ assertFalse(preLoadMMapDirectory.useDelegate("foo.bar"));
+ assertTrue(preLoadMMapDirectory.useDelegate("foo.cfs"));
+ assertTrue(preLoadMMapDirectory.useDelegate("foo.nvd"));
}
}