From db0ef4ab8d3723c574b29f382919d4b0b6d571ee Mon Sep 17 00:00:00 2001 From: arafat Date: Tue, 13 Aug 2024 23:34:01 +0530 Subject: [PATCH 1/5] HDDS-11309. Increase CONTAINER_STATE Column Length in UNHEALTHY_CONTAINERS Table to Avoid Truncation Errors for Enum States. --- .../schema/ContainerSchemaDefinition.java | 2 +- .../recon/fsck/TestContainerHealthTask.java | 65 ++++++++++++++++--- 2 files changed, 56 insertions(+), 11 deletions(-) diff --git a/hadoop-ozone/recon-codegen/src/main/java/org/hadoop/ozone/recon/schema/ContainerSchemaDefinition.java b/hadoop-ozone/recon-codegen/src/main/java/org/hadoop/ozone/recon/schema/ContainerSchemaDefinition.java index 4d62ca886cd..04c6969a52f 100644 --- a/hadoop-ozone/recon-codegen/src/main/java/org/hadoop/ozone/recon/schema/ContainerSchemaDefinition.java +++ b/hadoop-ozone/recon-codegen/src/main/java/org/hadoop/ozone/recon/schema/ContainerSchemaDefinition.java @@ -80,7 +80,7 @@ public void initializeSchema() throws SQLException { private void createUnhealthyContainersTable() { dslContext.createTableIfNotExists(UNHEALTHY_CONTAINERS_TABLE_NAME) .column(CONTAINER_ID, SQLDataType.BIGINT.nullable(false)) - .column(CONTAINER_STATE, SQLDataType.VARCHAR(16).nullable(false)) + .column(CONTAINER_STATE, SQLDataType.VARCHAR(40).nullable(false)) .column("in_state_since", SQLDataType.BIGINT.nullable(false)) .column("expected_replica_count", SQLDataType.INTEGER.nullable(false)) .column("actual_replica_count", SQLDataType.INTEGER.nullable(false)) diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java index 8647639dd13..096c847489e 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java @@ -21,8 +21,7 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; import static org.assertj.core.api.Assertions.assertThat; import static org.hadoop.ozone.recon.schema.ContainerSchemaDefinition.UnHealthyContainerStates.ALL_REPLICAS_UNHEALTHY; -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyInt; import static org.mockito.Mockito.mock; @@ -30,13 +29,7 @@ import java.io.IOException; import java.time.Duration; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; +import java.util.*; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.client.ReplicatedReplicationConfig; @@ -67,12 +60,15 @@ import org.hadoop.ozone.recon.schema.tables.pojos.ReconTaskStatus; import org.hadoop.ozone.recon.schema.tables.pojos.UnhealthyContainers; import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Class to test a single run of the Container Health Task. */ public class TestContainerHealthTask extends AbstractReconSqlDBTest { - + private static final Logger LOG = + LoggerFactory.getLogger(TestContainerHealthTask.class); public TestContainerHealthTask() { super(); } @@ -384,6 +380,55 @@ public void testDeletedContainer() throws Exception { .isGreaterThan(currentTime); } + @Test + public void testAllContainerStateInsertions() { + // Set up DAOs and Schema Manager + UnhealthyContainersDao unHealthyContainersTableHandle = + getDao(UnhealthyContainersDao.class); + + ContainerHealthSchemaManager containerHealthSchemaManager = + new ContainerHealthSchemaManager( + getSchemaDefinition(ContainerSchemaDefinition.class), + unHealthyContainersTableHandle); + + // Iterate through each state in the UnHealthyContainerStates enum + for (ContainerSchemaDefinition.UnHealthyContainerStates state : + ContainerSchemaDefinition.UnHealthyContainerStates.values()) { + // Create a dummy UnhealthyContainer record with the current state + UnhealthyContainers unhealthyContainer = new UnhealthyContainers(); + unhealthyContainer.setContainerId(state.ordinal() + 1L); // Ensure unique ID for each state + unhealthyContainer.setExpectedReplicaCount(3); + unhealthyContainer.setActualReplicaCount(1); + unhealthyContainer.setReplicaDelta(2); + unhealthyContainer.setContainerState(state.name()); + unhealthyContainer.setInStateSince(System.currentTimeMillis()); + + // Try inserting the record and catch any exception that occurs + Exception exception = null; + try { + containerHealthSchemaManager.insertUnhealthyContainerRecords( + Collections.singletonList(unhealthyContainer)); + } catch (Exception e) { + exception = e; + } + + // Assert no exception should be thrown for each state + assertNull(exception, + "Exception was thrown during insertion for state " + state.name() + + ": " + exception); + + // Optionally, verify the record was inserted correctly + List insertedRecords = + unHealthyContainersTableHandle.fetchByContainerId( + state.ordinal() + 1L); + assertFalse(insertedRecords.isEmpty(), + "Record was not inserted for state " + state.name() + "."); + assertEquals(insertedRecords.get(0).getContainerState(), state.name(), + "The inserted container state does not match for state " + + state.name() + "."); + } + } + @Test public void testNegativeSizeContainers() throws Exception { // Setup mock objects and test environment From 61dba702e708c327722c8e42b82379e015178a96 Mon Sep 17 00:00:00 2001 From: arafat Date: Tue, 13 Aug 2024 23:54:50 +0530 Subject: [PATCH 2/5] Fixed a few styling issues --- .../recon/fsck/TestContainerHealthTask.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java index 096c847489e..2d9e20a0558 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java @@ -21,7 +21,9 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; import static org.assertj.core.api.Assertions.assertThat; import static org.hadoop.ozone.recon.schema.ContainerSchemaDefinition.UnHealthyContainerStates.ALL_REPLICAS_UNHEALTHY; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyInt; import static org.mockito.Mockito.mock; @@ -29,7 +31,13 @@ import java.io.IOException; import java.time.Duration; -import java.util.*; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; import org.apache.hadoop.hdds.client.RatisReplicationConfig; import org.apache.hadoop.hdds.client.ReplicatedReplicationConfig; @@ -60,15 +68,12 @@ import org.hadoop.ozone.recon.schema.tables.pojos.ReconTaskStatus; import org.hadoop.ozone.recon.schema.tables.pojos.UnhealthyContainers; import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; /** * Class to test a single run of the Container Health Task. */ public class TestContainerHealthTask extends AbstractReconSqlDBTest { - private static final Logger LOG = - LoggerFactory.getLogger(TestContainerHealthTask.class); + public TestContainerHealthTask() { super(); } From da0bb064984e5a550fead1d31f5d5359242e8b73 Mon Sep 17 00:00:00 2001 From: arafat Date: Wed, 14 Aug 2024 19:39:50 +0530 Subject: [PATCH 3/5] Reverted the length back to 16 and instead changed the name of the enum entity --- .../ozone/recon/schema/ContainerSchemaDefinition.java | 4 ++-- .../recon/persistence/ContainerHealthSchemaManager.java | 4 ++-- .../hadoop/ozone/recon/fsck/TestContainerHealthTask.java | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/hadoop-ozone/recon-codegen/src/main/java/org/hadoop/ozone/recon/schema/ContainerSchemaDefinition.java b/hadoop-ozone/recon-codegen/src/main/java/org/hadoop/ozone/recon/schema/ContainerSchemaDefinition.java index 04c6969a52f..7c293ff1861 100644 --- a/hadoop-ozone/recon-codegen/src/main/java/org/hadoop/ozone/recon/schema/ContainerSchemaDefinition.java +++ b/hadoop-ozone/recon-codegen/src/main/java/org/hadoop/ozone/recon/schema/ContainerSchemaDefinition.java @@ -51,7 +51,7 @@ public enum UnHealthyContainerStates { UNDER_REPLICATED, OVER_REPLICATED, MIS_REPLICATED, - ALL_REPLICAS_UNHEALTHY, + ALL_REPLICAS_BAD, NEGATIVE_SIZE // Added new state to track containers with negative sizes } @@ -80,7 +80,7 @@ public void initializeSchema() throws SQLException { private void createUnhealthyContainersTable() { dslContext.createTableIfNotExists(UNHEALTHY_CONTAINERS_TABLE_NAME) .column(CONTAINER_ID, SQLDataType.BIGINT.nullable(false)) - .column(CONTAINER_STATE, SQLDataType.VARCHAR(40).nullable(false)) + .column(CONTAINER_STATE, SQLDataType.VARCHAR(16).nullable(false)) .column("in_state_since", SQLDataType.BIGINT.nullable(false)) .column("expected_replica_count", SQLDataType.INTEGER.nullable(false)) .column("actual_replica_count", SQLDataType.INTEGER.nullable(false)) diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java index 0c13376fa52..9ccc09d8d03 100644 --- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java +++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/persistence/ContainerHealthSchemaManager.java @@ -18,7 +18,7 @@ package org.apache.hadoop.ozone.recon.persistence; import static org.hadoop.ozone.recon.schema.ContainerSchemaDefinition.UnHealthyContainerStates.UNDER_REPLICATED; -import static org.hadoop.ozone.recon.schema.ContainerSchemaDefinition.UnHealthyContainerStates.ALL_REPLICAS_UNHEALTHY; +import static org.hadoop.ozone.recon.schema.ContainerSchemaDefinition.UnHealthyContainerStates.ALL_REPLICAS_BAD; import static org.hadoop.ozone.recon.schema.tables.UnhealthyContainersTable.UNHEALTHY_CONTAINERS; import static org.jooq.impl.DSL.count; @@ -76,7 +76,7 @@ public List getUnhealthyContainers( SelectQuery query = dslContext.selectQuery(); query.addFrom(UNHEALTHY_CONTAINERS); if (state != null) { - if (state.equals(ALL_REPLICAS_UNHEALTHY)) { + if (state.equals(ALL_REPLICAS_BAD)) { query.addConditions(UNHEALTHY_CONTAINERS.CONTAINER_STATE .eq(UNDER_REPLICATED.toString())); query.addConditions(UNHEALTHY_CONTAINERS.ACTUAL_REPLICA_COUNT.eq(0)); diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java index 2d9e20a0558..30d9392ffaa 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java @@ -20,10 +20,11 @@ import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE; import static org.assertj.core.api.Assertions.assertThat; -import static org.hadoop.ozone.recon.schema.ContainerSchemaDefinition.UnHealthyContainerStates.ALL_REPLICAS_UNHEALTHY; +import static org.hadoop.ozone.recon.schema.ContainerSchemaDefinition.UnHealthyContainerStates.ALL_REPLICAS_BAD; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyInt; import static org.mockito.Mockito.mock; @@ -200,7 +201,7 @@ public void testRun() throws Exception { List unhealthyContainers = containerHealthSchemaManager.getUnhealthyContainers( - ALL_REPLICAS_UNHEALTHY, 0, Integer.MAX_VALUE); + ALL_REPLICAS_BAD, 0, Integer.MAX_VALUE); assertEquals(1, unhealthyContainers.size()); assertEquals(2L, unhealthyContainers.get(0).getContainerId().longValue()); @@ -387,7 +388,6 @@ public void testDeletedContainer() throws Exception { @Test public void testAllContainerStateInsertions() { - // Set up DAOs and Schema Manager UnhealthyContainersDao unHealthyContainersTableHandle = getDao(UnhealthyContainersDao.class); From 6ee44c04f6b6183247cf62527813a96a1f2a0a86 Mon Sep 17 00:00:00 2001 From: arafat Date: Tue, 20 Aug 2024 11:55:17 +0530 Subject: [PATCH 4/5] Improved the test method --- .../recon/fsck/TestContainerHealthTask.java | 46 +++++++++++++++++-- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java index 30d9392ffaa..c20492db001 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java @@ -401,10 +401,48 @@ public void testAllContainerStateInsertions() { ContainerSchemaDefinition.UnHealthyContainerStates.values()) { // Create a dummy UnhealthyContainer record with the current state UnhealthyContainers unhealthyContainer = new UnhealthyContainers(); - unhealthyContainer.setContainerId(state.ordinal() + 1L); // Ensure unique ID for each state - unhealthyContainer.setExpectedReplicaCount(3); - unhealthyContainer.setActualReplicaCount(1); - unhealthyContainer.setReplicaDelta(2); + unhealthyContainer.setContainerId(state.ordinal() + 1L); + + // Set replica counts based on the state + switch (state) { + case MISSING: + case EMPTY_MISSING: + unhealthyContainer.setExpectedReplicaCount(3); + unhealthyContainer.setActualReplicaCount(0); + unhealthyContainer.setReplicaDelta(3); + break; + + case UNDER_REPLICATED: + unhealthyContainer.setExpectedReplicaCount(3); + unhealthyContainer.setActualReplicaCount(1); + unhealthyContainer.setReplicaDelta(2); + break; + + case OVER_REPLICATED: + unhealthyContainer.setExpectedReplicaCount(3); + unhealthyContainer.setActualReplicaCount(4); + unhealthyContainer.setReplicaDelta(-1); + break; + + case MIS_REPLICATED: + unhealthyContainer.setExpectedReplicaCount(3); + unhealthyContainer.setActualReplicaCount(3); + unhealthyContainer.setReplicaDelta(0); + break; + + case ALL_REPLICAS_BAD: + unhealthyContainer.setExpectedReplicaCount(3); + unhealthyContainer.setActualReplicaCount(0); + unhealthyContainer.setReplicaDelta(3); + break; + + case NEGATIVE_SIZE: + unhealthyContainer.setExpectedReplicaCount(3); + unhealthyContainer.setActualReplicaCount(3); + unhealthyContainer.setReplicaDelta(0); + break; + } + unhealthyContainer.setContainerState(state.name()); unhealthyContainer.setInStateSince(System.currentTimeMillis()); From 5a0207db7f9571a2739781042337d9bcb53f47bd Mon Sep 17 00:00:00 2001 From: arafat Date: Tue, 20 Aug 2024 12:28:04 +0530 Subject: [PATCH 5/5] Added a default case and also merged few cases together --- .../ozone/recon/fsck/TestContainerHealthTask.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java index c20492db001..ae46bd8b5b5 100644 --- a/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java +++ b/hadoop-ozone/recon/src/test/java/org/apache/hadoop/ozone/recon/fsck/TestContainerHealthTask.java @@ -22,9 +22,10 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.hadoop.ozone.recon.schema.ContainerSchemaDefinition.UnHealthyContainerStates.ALL_REPLICAS_BAD; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyInt; import static org.mockito.Mockito.mock; @@ -399,6 +400,7 @@ public void testAllContainerStateInsertions() { // Iterate through each state in the UnHealthyContainerStates enum for (ContainerSchemaDefinition.UnHealthyContainerStates state : ContainerSchemaDefinition.UnHealthyContainerStates.values()) { + // Create a dummy UnhealthyContainer record with the current state UnhealthyContainers unhealthyContainer = new UnhealthyContainers(); unhealthyContainer.setContainerId(state.ordinal() + 1L); @@ -425,6 +427,7 @@ public void testAllContainerStateInsertions() { break; case MIS_REPLICATED: + case NEGATIVE_SIZE: unhealthyContainer.setExpectedReplicaCount(3); unhealthyContainer.setActualReplicaCount(3); unhealthyContainer.setReplicaDelta(0); @@ -436,11 +439,8 @@ public void testAllContainerStateInsertions() { unhealthyContainer.setReplicaDelta(3); break; - case NEGATIVE_SIZE: - unhealthyContainer.setExpectedReplicaCount(3); - unhealthyContainer.setActualReplicaCount(3); - unhealthyContainer.setReplicaDelta(0); - break; + default: + fail("Unhandled state: " + state.name() + ". Please add this state to the switch case."); } unhealthyContainer.setContainerState(state.name());