diff --git a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java index 75d7081e7729a..22dc0846e610b 100644 --- a/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/IndexScopedSettings.java @@ -179,6 +179,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { BitsetFilterCache.INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING, IndexModule.INDEX_STORE_TYPE_SETTING, IndexModule.INDEX_STORE_PRE_LOAD_SETTING, + IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS, IndexModule.INDEX_RECOVERY_TYPE_SETTING, IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING, FsDirectoryFactory.INDEX_LOCK_FACTOR_SETTING, diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index 46c2b91b4b99a..f8604caeab414 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -148,6 +148,18 @@ public final class IndexModule { Property.NodeScope ); + /** Which lucene file extensions to load with the mmap directory when using hybridfs store. + * This is an expert setting. + * @see Lucene File Extensions. + */ + public static final Setting> INDEX_STORE_HYBRID_MMAP_EXTENSIONS = Setting.listSetting( + "index.store.hybrid.mmap.extensions", + List.of("nvd", "dvd", "tim", "tip", "dim", "kdd", "kdi", "cfs", "doc"), + Function.identity(), + Property.IndexScope, + Property.NodeScope + ); + public static final String SIMILARITY_SETTINGS_PREFIX = "index.similarity"; // whether to use the query cache diff --git a/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java index 57cc9f09ac37d..88c7e12632863 100644 --- a/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java +++ b/server/src/main/java/org/opensearch/index/store/FsDirectoryFactory.java @@ -97,9 +97,10 @@ protected Directory newFSDirectory(Path location, LockFactory lockFactory, Index case HYBRIDFS: // Use Lucene defaults final FSDirectory primaryDirectory = FSDirectory.open(location, lockFactory); + final Set mmapExtensions = new HashSet<>(indexSettings.getValue(IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS)); if (primaryDirectory instanceof MMapDirectory) { MMapDirectory mMapDirectory = (MMapDirectory) primaryDirectory; - return new HybridDirectory(lockFactory, setPreload(mMapDirectory, lockFactory, preLoadExtensions)); + return new HybridDirectory(lockFactory, setPreload(mMapDirectory, lockFactory, preLoadExtensions), mmapExtensions); } else { return primaryDirectory; } @@ -142,10 +143,12 @@ public static boolean isHybridFs(Directory directory) { */ static final class HybridDirectory extends NIOFSDirectory { private final MMapDirectory delegate; + private final Set mmapExtensions; - HybridDirectory(LockFactory lockFactory, MMapDirectory delegate) throws IOException { + HybridDirectory(LockFactory lockFactory, MMapDirectory delegate, Set mmapExtensions) throws IOException { super(delegate.getDirectory(), lockFactory); this.delegate = delegate; + this.mmapExtensions = mmapExtensions; } @Override @@ -164,43 +167,16 @@ public IndexInput openInput(String name, IOContext context) throws IOException { } } + boolean useDelegate(String name) { + final String extension = FileSwitchDirectory.getExtension(name); + return mmapExtensions.contains(extension); + } + @Override public void close() throws IOException { IOUtils.close(super::close, delegate); } - boolean useDelegate(String name) { - String extension = FileSwitchDirectory.getExtension(name); - switch (extension) { - // Norms, doc values and term dictionaries are typically performance-sensitive and hot in the page - // cache, so we use mmap, which provides better performance. - case "nvd": - case "dvd": - case "tim": - // We want to open the terms index and KD-tree index off-heap to save memory, but this only performs - // well if using mmap. - case "tip": - // dim files only apply up to lucene 8.x indices. It can be removed once we are in lucene 10 - case "dim": - case "kdd": - case "kdi": - // Compound files are tricky because they store all the information for the segment. Benchmarks - // suggested that not mapping them hurts performance. - case "cfs": - // MMapDirectory has special logic to read long[] arrays in little-endian order that helps speed - // up the decoding of postings. The same logic applies to positions (.pos) of offsets (.pay) but we - // are not mmaping them as queries that leverage positions are more costly and the decoding of postings - // tends to be less a bottleneck. - case "doc": - return true; - // Other files are either less performance-sensitive (e.g. stored field index, norms metadata) - // or are large and have a random access pattern and mmap leads to page cache trashing - // (e.g. stored fields and term vectors). - default: - return false; - } - } - MMapDirectory getDelegate() { return delegate; } diff --git a/server/src/test/java/org/opensearch/index/store/FsDirectoryFactoryTests.java b/server/src/test/java/org/opensearch/index/store/FsDirectoryFactoryTests.java index 70d4ae790f146..cf8d6677b4227 100644 --- a/server/src/test/java/org/opensearch/index/store/FsDirectoryFactoryTests.java +++ b/server/src/test/java/org/opensearch/index/store/FsDirectoryFactoryTests.java @@ -70,20 +70,52 @@ public void testPreload() throws IOException { try (Directory directory = newDirectory(build)) { assertTrue(FsDirectoryFactory.isHybridFs(directory)); FsDirectoryFactory.HybridDirectory hybridDirectory = (FsDirectoryFactory.HybridDirectory) directory; - assertTrue(hybridDirectory.useDelegate("foo.dvd")); + // test default hybrid mmap extensions assertTrue(hybridDirectory.useDelegate("foo.nvd")); + assertTrue(hybridDirectory.useDelegate("foo.dvd")); assertTrue(hybridDirectory.useDelegate("foo.tim")); assertTrue(hybridDirectory.useDelegate("foo.tip")); - assertTrue(hybridDirectory.useDelegate("foo.cfs")); assertTrue(hybridDirectory.useDelegate("foo.dim")); assertTrue(hybridDirectory.useDelegate("foo.kdd")); assertTrue(hybridDirectory.useDelegate("foo.kdi")); - assertFalse(hybridDirectory.useDelegate("foo.bar")); + assertTrue(hybridDirectory.useDelegate("foo.cfs")); + assertTrue(hybridDirectory.useDelegate("foo.doc")); + assertFalse(hybridDirectory.useDelegate("foo.pos")); + assertFalse(hybridDirectory.useDelegate("foo.pay")); MMapDirectory delegate = hybridDirectory.getDelegate(); assertThat(delegate, Matchers.instanceOf(FsDirectoryFactory.PreLoadMMapDirectory.class)); FsDirectoryFactory.PreLoadMMapDirectory preLoadMMapDirectory = (FsDirectoryFactory.PreLoadMMapDirectory) delegate; assertTrue(preLoadMMapDirectory.useDelegate("foo.dvd")); assertTrue(preLoadMMapDirectory.useDelegate("foo.bar")); + assertFalse(preLoadMMapDirectory.useDelegate("foo.cfs")); + } + build = Settings.builder() + .put(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.HYBRIDFS.name().toLowerCase(Locale.ROOT)) + .putList(IndexModule.INDEX_STORE_PRE_LOAD_SETTING.getKey(), "nvd", "dvd", "cfs") + .putList(IndexModule.INDEX_STORE_HYBRID_MMAP_EXTENSIONS.getKey(), "nvd", "dvd", "tim", "pos", "pay") + .build(); + try (Directory directory = newDirectory(build)) { + assertTrue(FsDirectoryFactory.isHybridFs(directory)); + FsDirectoryFactory.HybridDirectory hybridDirectory = (FsDirectoryFactory.HybridDirectory) directory; + // test custom hybrid mmap extensions + assertTrue(hybridDirectory.useDelegate("foo.nvd")); + assertTrue(hybridDirectory.useDelegate("foo.dvd")); + assertTrue(hybridDirectory.useDelegate("foo.tim")); + assertFalse(hybridDirectory.useDelegate("foo.tip")); + assertFalse(hybridDirectory.useDelegate("foo.dim")); + assertFalse(hybridDirectory.useDelegate("foo.kdd")); + assertFalse(hybridDirectory.useDelegate("foo.kdi")); + assertFalse(hybridDirectory.useDelegate("foo.cfs")); + assertFalse(hybridDirectory.useDelegate("foo.doc")); + assertTrue(hybridDirectory.useDelegate("foo.pos")); + assertTrue(hybridDirectory.useDelegate("foo.pay")); + MMapDirectory delegate = hybridDirectory.getDelegate(); + assertThat(delegate, Matchers.instanceOf(FsDirectoryFactory.PreLoadMMapDirectory.class)); + FsDirectoryFactory.PreLoadMMapDirectory preLoadMMapDirectory = (FsDirectoryFactory.PreLoadMMapDirectory) delegate; + assertTrue(preLoadMMapDirectory.useDelegate("foo.dvd")); + assertFalse(preLoadMMapDirectory.useDelegate("foo.bar")); + assertTrue(preLoadMMapDirectory.useDelegate("foo.cfs")); + assertTrue(preLoadMMapDirectory.useDelegate("foo.nvd")); } }