From 3f8a531143b2918aa7e93e30a1b875f9aa464e32 Mon Sep 17 00:00:00 2001 From: Chandrakant Vankayalapati <104664857+ceekay47@users.noreply.github.com> Date: Fri, 5 Apr 2024 15:06:55 -0700 Subject: [PATCH] HDDS-10206. Expose jmx metrics for snapshot cache size on the ozone manager. (#6138) (cherry picked from commit 301664e46a546a8fabdf73f38c596da196674941) --- .../org/apache/hadoop/ozone/om/OMMetrics.java | 13 ++++++ .../hadoop/ozone/om/OmSnapshotManager.java | 2 +- .../ozone/om/snapshot/SnapshotCache.java | 11 ++++- .../ozone/om/snapshot/TestSnapshotCache.java | 42 ++++++++++++++++++- .../om/snapshot/TestSnapshotDiffManager.java | 2 +- 5 files changed, 65 insertions(+), 5 deletions(-) diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java index c499de2d176..1c0ec78cfb2 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java @@ -26,6 +26,7 @@ import org.apache.hadoop.metrics2.annotation.Metrics; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.metrics2.lib.MutableCounterLong; +import org.apache.hadoop.metrics2.lib.MutableGaugeInt; /** * This class is for maintaining Ozone Manager statistics. @@ -77,6 +78,7 @@ public class OMMetrics implements OmMetadataReaderMetrics { private @Metric MutableCounterLong numSnapshotPurges; private @Metric MutableCounterLong numSnapshotSetProperties; + private @Metric MutableGaugeInt numSnapshotCacheSize; private @Metric MutableCounterLong numGetFileStatus; private @Metric MutableCounterLong numCreateDirectory; private @Metric MutableCounterLong numCreateFile; @@ -551,6 +553,17 @@ public void decNumSnapshotDeleted() { numSnapshotDeleted.incr(-1); } + public int getNumSnapshotCacheSize() { + return numSnapshotCacheSize.value(); + } + public void incNumSnapshotCacheSize() { + numSnapshotCacheSize.incr(); + } + + public void decNumSnapshotCacheSize() { + numSnapshotCacheSize.decr(); + } + public void incNumCompleteMultipartUploadFails() { numCompleteMultipartUploadFails.incr(); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java index 602620743b0..a3799b389c5 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmSnapshotManager.java @@ -272,7 +272,7 @@ public OmSnapshotManager(OzoneManager ozoneManager) { }; // Init snapshot cache - this.snapshotCache = new SnapshotCache(loader, softCacheSize); + this.snapshotCache = new SnapshotCache(loader, softCacheSize, ozoneManager.getMetrics()); this.snapshotDiffManager = new SnapshotDiffManager(snapshotDiffDb, differ, ozoneManager, snapDiffJobCf, snapDiffReportCf, diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java index 0b64d6d069b..f14837462b0 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java @@ -19,6 +19,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.cache.CacheLoader; +import org.apache.hadoop.ozone.om.OMMetrics; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.hadoop.ozone.om.exceptions.OMException; import org.slf4j.Logger; @@ -51,10 +52,13 @@ public class SnapshotCache { // opened on the OM. private final int cacheSizeLimit; - public SnapshotCache(CacheLoader cacheLoader, int cacheSizeLimit) { + private final OMMetrics omMetrics; + + public SnapshotCache(CacheLoader cacheLoader, int cacheSizeLimit, OMMetrics omMetrics) { this.dbMap = new ConcurrentHashMap<>(); this.cacheLoader = cacheLoader; this.cacheSizeLimit = cacheSizeLimit; + this.omMetrics = omMetrics; } @VisibleForTesting @@ -83,6 +87,7 @@ public void invalidate(UUID key) throws IOException { } catch (IOException e) { throw new IllegalStateException("Failed to close snapshotId: " + key, e); } + omMetrics.decNumSnapshotCacheSize(); } return null; }); @@ -104,6 +109,7 @@ public void invalidateAll() { throw new IllegalStateException("Failed to close snapshot", e); } it.remove(); + omMetrics.decNumSnapshotCacheSize(); } } @@ -150,6 +156,7 @@ public ReferenceCounted get(UUID key) throws IOException { // Unexpected and unknown exception thrown from CacheLoader#load throw new IllegalStateException(ex); } + omMetrics.incNumSnapshotCacheSize(); } if (v != null) { // When RC OmSnapshot is successfully loaded @@ -157,7 +164,6 @@ public ReferenceCounted get(UUID key) throws IOException { } return v; }); - if (rcOmSnapshot == null) { // The only exception that would fall through the loader logic above // is OMException with FILE_NOT_FOUND. @@ -227,6 +233,7 @@ private void cleanupInternal() { } catch (IOException ex) { throw new IllegalStateException("Error while closing snapshot DB.", ex); } + omMetrics.decNumSnapshotCacheSize(); return null; } }); diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java index 21b795216de..2a1e2ec99fc 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotCache.java @@ -18,6 +18,7 @@ package org.apache.hadoop.ozone.om.snapshot; import com.google.common.cache.CacheLoader; +import org.apache.hadoop.ozone.om.OMMetrics; import org.apache.hadoop.ozone.om.OmSnapshot; import org.apache.ozone.test.GenericTestUtils; import org.junit.jupiter.api.AfterEach; @@ -52,6 +53,8 @@ class TestSnapshotCache { private static CacheLoader cacheLoader; private SnapshotCache snapshotCache; + private OMMetrics omMetrics; + @BeforeAll static void beforeAll() throws Exception { cacheLoader = mock(CacheLoader.class); @@ -74,7 +77,8 @@ static void beforeAll() throws Exception { @BeforeEach void setUp() { // Reset cache for each test case - snapshotCache = new SnapshotCache(cacheLoader, CACHE_SIZE_LIMIT); + omMetrics = OMMetrics.create(); + snapshotCache = new SnapshotCache(cacheLoader, CACHE_SIZE_LIMIT, omMetrics); } @AfterEach @@ -87,11 +91,13 @@ void tearDown() { @DisplayName("get()") void testGet() throws IOException { final UUID dbKey1 = UUID.randomUUID(); + assertEquals(0, omMetrics.getNumSnapshotCacheSize()); ReferenceCounted omSnapshot = snapshotCache.get(dbKey1); assertNotNull(omSnapshot); assertNotNull(omSnapshot.get()); assertInstanceOf(OmSnapshot.class, omSnapshot.get()); assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); } @Test @@ -101,12 +107,14 @@ void testGetTwice() throws IOException { ReferenceCounted omSnapshot1 = snapshotCache.get(dbKey1); assertNotNull(omSnapshot1); assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); ReferenceCounted omSnapshot1again = snapshotCache.get(dbKey1); // Should be the same instance assertEquals(omSnapshot1, omSnapshot1again); assertEquals(omSnapshot1.get(), omSnapshot1again.get()); assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); } @Test @@ -117,10 +125,12 @@ void testReleaseByDbKey() throws IOException { assertNotNull(omSnapshot1); assertNotNull(omSnapshot1.get()); assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); snapshotCache.release(dbKey1); // Entry will not be immediately evicted assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); } @Test @@ -130,13 +140,16 @@ void testInvalidate() throws IOException { ReferenceCounted omSnapshot = snapshotCache.get(dbKey1); assertNotNull(omSnapshot); assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); snapshotCache.release(dbKey1); // Entry will not be immediately evicted assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); snapshotCache.invalidate(dbKey1); assertEquals(0, snapshotCache.size()); + assertEquals(0, omMetrics.getNumSnapshotCacheSize()); } @Test @@ -146,11 +159,13 @@ void testInvalidateAll() throws IOException { ReferenceCounted omSnapshot1 = snapshotCache.get(dbKey1); assertNotNull(omSnapshot1); assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); final UUID dbKey2 = UUID.randomUUID(); ReferenceCounted omSnapshot2 = snapshotCache.get(dbKey2); assertNotNull(omSnapshot2); assertEquals(2, snapshotCache.size()); + assertEquals(2, omMetrics.getNumSnapshotCacheSize()); // Should be difference omSnapshot instances assertNotEquals(omSnapshot1, omSnapshot2); @@ -158,16 +173,20 @@ void testInvalidateAll() throws IOException { ReferenceCounted omSnapshot3 = snapshotCache.get(dbKey3); assertNotNull(omSnapshot3); assertEquals(3, snapshotCache.size()); + assertEquals(3, omMetrics.getNumSnapshotCacheSize()); snapshotCache.release(dbKey1); // Entry will not be immediately evicted assertEquals(3, snapshotCache.size()); + assertEquals(3, omMetrics.getNumSnapshotCacheSize()); snapshotCache.invalidate(dbKey1); assertEquals(2, snapshotCache.size()); + assertEquals(2, omMetrics.getNumSnapshotCacheSize()); snapshotCache.invalidateAll(); assertEquals(0, snapshotCache.size()); + assertEquals(0, omMetrics.getNumSnapshotCacheSize()); } private void assertEntryExistence(UUID key, boolean shouldExist) { @@ -191,26 +210,33 @@ void testEviction1() throws IOException { final UUID dbKey1 = UUID.randomUUID(); snapshotCache.get(dbKey1); assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); snapshotCache.release(dbKey1); assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); final UUID dbKey2 = UUID.randomUUID(); snapshotCache.get(dbKey2); assertEquals(2, snapshotCache.size()); + assertEquals(2, omMetrics.getNumSnapshotCacheSize()); snapshotCache.release(dbKey2); assertEquals(2, snapshotCache.size()); + assertEquals(2, omMetrics.getNumSnapshotCacheSize()); final UUID dbKey3 = UUID.randomUUID(); snapshotCache.get(dbKey3); assertEquals(3, snapshotCache.size()); + assertEquals(3, omMetrics.getNumSnapshotCacheSize()); snapshotCache.release(dbKey3); assertEquals(3, snapshotCache.size()); + assertEquals(3, omMetrics.getNumSnapshotCacheSize()); final UUID dbKey4 = UUID.randomUUID(); snapshotCache.get(dbKey4); // dbKey1, dbKey2 and dbKey3 would have been evicted by the end of the last get() because // those were release()d. assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); assertEntryExistence(dbKey1, false); } @@ -221,25 +247,30 @@ void testEviction2() throws IOException { final UUID dbKey1 = UUID.randomUUID(); snapshotCache.get(dbKey1); assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); final UUID dbKey2 = UUID.randomUUID(); snapshotCache.get(dbKey2); assertEquals(2, snapshotCache.size()); + assertEquals(2, omMetrics.getNumSnapshotCacheSize()); final UUID dbKey3 = UUID.randomUUID(); snapshotCache.get(dbKey3); assertEquals(3, snapshotCache.size()); + assertEquals(3, omMetrics.getNumSnapshotCacheSize()); final UUID dbKey4 = UUID.randomUUID(); snapshotCache.get(dbKey4); // dbKey1 would not have been evicted because it is not release()d assertEquals(4, snapshotCache.size()); + assertEquals(4, omMetrics.getNumSnapshotCacheSize()); assertEntryExistence(dbKey1, true); // Releasing dbKey2 at this point should immediately trigger its eviction // because the cache size exceeded the soft limit snapshotCache.release(dbKey2); assertEquals(3, snapshotCache.size()); + assertEquals(3, omMetrics.getNumSnapshotCacheSize()); assertEntryExistence(dbKey2, false); assertEntryExistence(dbKey1, true); } @@ -252,41 +283,50 @@ void testEviction3WithClose() throws IOException { try (ReferenceCounted rcOmSnapshot = snapshotCache.get(dbKey1)) { assertEquals(1L, rcOmSnapshot.getTotalRefCount()); assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); } // ref count should have been decreased because it would be close()d // upon exiting try-with-resources. assertEquals(0L, snapshotCache.getDbMap().get(dbKey1).getTotalRefCount()); assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); final UUID dbKey2 = UUID.randomUUID(); try (ReferenceCounted rcOmSnapshot = snapshotCache.get(dbKey2)) { assertEquals(1L, rcOmSnapshot.getTotalRefCount()); assertEquals(2, snapshotCache.size()); + assertEquals(2, omMetrics.getNumSnapshotCacheSize()); // Get dbKey2 entry a second time try (ReferenceCounted rcOmSnapshot2 = snapshotCache.get(dbKey2)) { assertEquals(2L, rcOmSnapshot.getTotalRefCount()); assertEquals(2L, rcOmSnapshot2.getTotalRefCount()); assertEquals(2, snapshotCache.size()); + assertEquals(2, omMetrics.getNumSnapshotCacheSize()); } assertEquals(1L, rcOmSnapshot.getTotalRefCount()); } assertEquals(0L, snapshotCache.getDbMap().get(dbKey2).getTotalRefCount()); assertEquals(2, snapshotCache.size()); + assertEquals(2, omMetrics.getNumSnapshotCacheSize()); final UUID dbKey3 = UUID.randomUUID(); try (ReferenceCounted rcOmSnapshot = snapshotCache.get(dbKey3)) { assertEquals(1L, rcOmSnapshot.getTotalRefCount()); assertEquals(3, snapshotCache.size()); + assertEquals(3, omMetrics.getNumSnapshotCacheSize()); } assertEquals(0L, snapshotCache.getDbMap().get(dbKey3).getTotalRefCount()); assertEquals(3, snapshotCache.size()); + assertEquals(3, omMetrics.getNumSnapshotCacheSize()); final UUID dbKey4 = UUID.randomUUID(); try (ReferenceCounted rcOmSnapshot = snapshotCache.get(dbKey4)) { assertEquals(1L, rcOmSnapshot.getTotalRefCount()); assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); } assertEquals(0L, snapshotCache.getDbMap().get(dbKey4).getTotalRefCount()); assertEquals(1, snapshotCache.size()); + assertEquals(1, omMetrics.getNumSnapshotCacheSize()); } } diff --git a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java index c0a85c787d5..a9e67b00cc9 100644 --- a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java +++ b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotDiffManager.java @@ -364,7 +364,7 @@ public void init() throws RocksDBException, IOException, ExecutionException { omSnapshotManager = mock(OmSnapshotManager.class); when(ozoneManager.getOmSnapshotManager()).thenReturn(omSnapshotManager); when(omSnapshotManager.isSnapshotStatus(any(), any())).thenReturn(true); - SnapshotCache snapshotCache = new SnapshotCache(mockCacheLoader(), 10); + SnapshotCache snapshotCache = new SnapshotCache(mockCacheLoader(), 10, omMetrics); when(omSnapshotManager.getActiveSnapshot(anyString(), anyString(), anyString())) .thenAnswer(invocationOnMock -> {