Skip to content

Commit

Permalink
HDDS-11694. Safemode Improvement: Introduce factory class to create s…
Browse files Browse the repository at this point in the history
…afemode rules. (apache#7433)
  • Loading branch information
nandakumar131 authored Dec 9, 2024
1 parent a46153d commit befd64e
Show file tree
Hide file tree
Showing 8 changed files with 290 additions and 47 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ public class ContainerSafeModeRule extends
private AtomicLong ecContainerWithMinReplicas = new AtomicLong(0);
private final ContainerManager containerManager;

public ContainerSafeModeRule(String ruleName, EventQueue eventQueue,
ConfigurationSource conf,
ContainerManager containerManager, SCMSafeModeManager manager) {
this(ruleName, eventQueue, conf, containerManager.getContainers(), containerManager, manager);
}

public ContainerSafeModeRule(String ruleName, EventQueue eventQueue,
ConfigurationSource conf,
List<ContainerInfo> containers,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hdds.HddsConfigKeys;
Expand Down Expand Up @@ -90,7 +91,7 @@ public class SCMSafeModeManager implements SafeModeManager {
private AtomicBoolean preCheckComplete = new AtomicBoolean(false);
private AtomicBoolean forceExitSafeMode = new AtomicBoolean(false);

private Map<String, SafeModeExitRule> exitRules = new HashMap(1);
private Map<String, SafeModeExitRule> exitRules = new HashMap<>(1);
private Set<String> preCheckRules = new HashSet<>(1);
private ConfigurationSource config;
private static final String CONT_EXIT_RULE = "ContainerSafeModeRule";
Expand All @@ -110,6 +111,8 @@ public class SCMSafeModeManager implements SafeModeManager {

private final SafeModeMetrics safeModeMetrics;


// TODO: Remove allContainers argument. (HDDS-11795)
public SCMSafeModeManager(ConfigurationSource conf,
List<ContainerInfo> allContainers,
ContainerManager containerManager, PipelineManager pipelineManager,
Expand All @@ -126,30 +129,17 @@ public SCMSafeModeManager(ConfigurationSource conf,

if (isSafeModeEnabled) {
this.safeModeMetrics = SafeModeMetrics.create();
ContainerSafeModeRule containerSafeModeRule =
new ContainerSafeModeRule(CONT_EXIT_RULE, eventQueue, config,
allContainers, containerManager, this);
DataNodeSafeModeRule dataNodeSafeModeRule =
new DataNodeSafeModeRule(DN_EXIT_RULE, eventQueue, config, this);
exitRules.put(CONT_EXIT_RULE, containerSafeModeRule);
exitRules.put(DN_EXIT_RULE, dataNodeSafeModeRule);
preCheckRules.add(DN_EXIT_RULE);
if (conf.getBoolean(
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK,
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT)
&& pipelineManager != null) {
HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
new HealthyPipelineSafeModeRule(HEALTHY_PIPELINE_EXIT_RULE,
eventQueue, pipelineManager,
this, config, scmContext);
OneReplicaPipelineSafeModeRule oneReplicaPipelineSafeModeRule =
new OneReplicaPipelineSafeModeRule(
ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE, eventQueue,
pipelineManager, this, conf);
exitRules.put(HEALTHY_PIPELINE_EXIT_RULE, healthyPipelineSafeModeRule);
exitRules.put(ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE,
oneReplicaPipelineSafeModeRule);
}

// TODO: Remove the cyclic ("this") dependency (HDDS-11797)
SafeModeRuleFactory.initialize(config, scmContext, eventQueue,
this, pipelineManager, containerManager);
SafeModeRuleFactory factory = SafeModeRuleFactory.getInstance();

exitRules = factory.getSafeModeRules().stream().collect(
Collectors.toMap(SafeModeExitRule::getRuleName, rule -> rule));

preCheckRules = factory.getPreCheckRules().stream()
.map(SafeModeExitRule::getRuleName).collect(Collectors.toSet());
} else {
this.safeModeMetrics = null;
exitSafeMode(eventQueue, true);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hdds.scm.safemode;


import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.scm.container.ContainerManager;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
import org.apache.hadoop.hdds.server.events.EventQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.List;

/**
* Factory to create SafeMode rules.
*/
public final class SafeModeRuleFactory {


private static final Logger LOG = LoggerFactory.getLogger(SafeModeRuleFactory.class);

// TODO: Move the rule names to respective rules. (HDDS-11798)
private static final String CONT_EXIT_RULE = "ContainerSafeModeRule";
private static final String DN_EXIT_RULE = "DataNodeSafeModeRule";
private static final String HEALTHY_PIPELINE_EXIT_RULE =
"HealthyPipelineSafeModeRule";
private static final String ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE =
"AtleastOneDatanodeReportedRule";

private final ConfigurationSource config;
private final SCMContext scmContext;
private final EventQueue eventQueue;

// TODO: Remove dependency on safeModeManager (HDDS-11797)
private final SCMSafeModeManager safeModeManager;
private final PipelineManager pipelineManager;
private final ContainerManager containerManager;

private final List<SafeModeExitRule<?>> safeModeRules;
private final List<SafeModeExitRule<?>> preCheckRules;

private static SafeModeRuleFactory instance;

private SafeModeRuleFactory(final ConfigurationSource config,
final SCMContext scmContext,
final EventQueue eventQueue,
final SCMSafeModeManager safeModeManager,
final PipelineManager pipelineManager,
final ContainerManager containerManager) {
this.config = config;
this.scmContext = scmContext;
this.eventQueue = eventQueue;
this.safeModeManager = safeModeManager;
this.pipelineManager = pipelineManager;
this.containerManager = containerManager;
this.safeModeRules = new ArrayList<>();
this.preCheckRules = new ArrayList<>();
loadRules();
}

private void loadRules() {
// TODO: Use annotation to load the rules. (HDDS-11730)
safeModeRules.add(new ContainerSafeModeRule(CONT_EXIT_RULE, eventQueue, config,
containerManager, safeModeManager));
SafeModeExitRule<?> dnRule = new DataNodeSafeModeRule(DN_EXIT_RULE, eventQueue, config, safeModeManager);
safeModeRules.add(dnRule);
preCheckRules.add(dnRule);

// TODO: Move isRuleEnabled check to the Rule implementation. (HDDS-11799)
if (config.getBoolean(
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK,
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT)
&& pipelineManager != null) {

safeModeRules.add(new HealthyPipelineSafeModeRule(HEALTHY_PIPELINE_EXIT_RULE,
eventQueue, pipelineManager, safeModeManager, config, scmContext));
safeModeRules.add(new OneReplicaPipelineSafeModeRule(
ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE, eventQueue,
pipelineManager, safeModeManager, config));
}

}

public static synchronized SafeModeRuleFactory getInstance() {
if (instance != null) {
return instance;
}
throw new IllegalStateException("SafeModeRuleFactory not initialized," +
" call initialize method before getInstance.");
}

// TODO: Refactor and reduce the arguments. (HDDS-11800)
public static synchronized void initialize(
final ConfigurationSource config,
final SCMContext scmContext,
final EventQueue eventQueue,
final SCMSafeModeManager safeModeManager,
final PipelineManager pipelineManager,
final ContainerManager containerManager) {
instance = new SafeModeRuleFactory(config, scmContext, eventQueue,
safeModeManager, pipelineManager, containerManager);
}

public List<SafeModeExitRule<?>> getSafeModeRules() {
return safeModeRules;
}

public List<SafeModeExitRule<?>> getPreCheckRules() {
return preCheckRules;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,8 @@ public void testClosePipelineShouldFailOnFollower() throws Exception {
public void testPipelineReport() throws Exception {
try (PipelineManagerImpl pipelineManager = createPipelineManager(true)) {
SCMSafeModeManager scmSafeModeManager =
new SCMSafeModeManager(conf, new ArrayList<>(), null, pipelineManager,
new SCMSafeModeManager(conf, new ArrayList<>(),
mock(ContainerManager.class), pipelineManager,
new EventQueue(), serviceManager, scmContext);
Pipeline pipeline = pipelineManager
.createPipeline(RatisReplicationConfig
Expand Down Expand Up @@ -469,7 +470,7 @@ public void testPipelineOpenOnlyWhenLeaderReported() throws Exception {

SCMSafeModeManager scmSafeModeManager =
new SCMSafeModeManager(new OzoneConfiguration(), new ArrayList<>(),
null, pipelineManager, new EventQueue(),
mock(ContainerManager.class), pipelineManager, new EventQueue(),
serviceManager, scmContext);
PipelineReportHandler pipelineReportHandler =
new PipelineReportHandler(scmSafeModeManager, pipelineManager,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor;
import org.apache.hadoop.hdds.scm.HddsTestUtils;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerManager;
import org.apache.hadoop.hdds.scm.container.MockNodeManager;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.ha.SCMHAManagerStub;
Expand All @@ -50,6 +51,8 @@

import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

/**
* This class tests HealthyPipelineSafeMode rule.
Expand All @@ -69,6 +72,8 @@ public void testHealthyPipelineSafeModeRuleWithNoPipelines()

OzoneConfiguration config = new OzoneConfiguration();
MockNodeManager nodeManager = new MockNodeManager(true, 0);
ContainerManager containerManager = mock(ContainerManager.class);
when(containerManager.getContainers()).thenReturn(containers);
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, tempFile.getPath());
// enable pipeline check
config.setBoolean(
Expand All @@ -94,7 +99,7 @@ public void testHealthyPipelineSafeModeRuleWithNoPipelines()
pipelineManager.setPipelineProvider(HddsProtos.ReplicationType.RATIS,
mockRatisProvider);
SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
config, containers, null, pipelineManager, eventQueue,
config, containers, containerManager, pipelineManager, eventQueue,
serviceManager, scmContext);

HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
Expand All @@ -121,6 +126,8 @@ public void testHealthyPipelineSafeModeRuleWithPipelines() throws Exception {
// stale and last one is dead, and this repeats. So for a 12 node, 9
// healthy, 2 stale and one dead.
MockNodeManager nodeManager = new MockNodeManager(true, 12);
ContainerManager containerManager = mock(ContainerManager.class);
when(containerManager.getContainers()).thenReturn(containers);
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, tempFile.getPath());
// enable pipeline check
config.setBoolean(
Expand Down Expand Up @@ -172,7 +179,7 @@ public void testHealthyPipelineSafeModeRuleWithPipelines() throws Exception {
MockRatisPipelineProvider.markPipelineHealthy(pipeline3);

SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
config, containers, null, pipelineManager, eventQueue,
config, containers, containerManager, pipelineManager, eventQueue,
serviceManager, scmContext);

HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
Expand Down Expand Up @@ -215,6 +222,8 @@ public void testHealthyPipelineSafeModeRuleWithMixedPipelines()
// stale and last one is dead, and this repeats. So for a 12 node, 9
// healthy, 2 stale and one dead.
MockNodeManager nodeManager = new MockNodeManager(true, 12);
ContainerManager containerManager = mock(ContainerManager.class);
when(containerManager.getContainers()).thenReturn(containers);
config.set(HddsConfigKeys.OZONE_METADATA_DIRS, tempFile.getPath());
// enable pipeline check
config.setBoolean(
Expand Down Expand Up @@ -266,7 +275,7 @@ public void testHealthyPipelineSafeModeRuleWithMixedPipelines()
MockRatisPipelineProvider.markPipelineHealthy(pipeline3);

SCMSafeModeManager scmSafeModeManager = new SCMSafeModeManager(
config, containers, null, pipelineManager, eventQueue,
config, containers, containerManager, pipelineManager, eventQueue,
serviceManager, scmContext);

HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.PipelineReport;
import org.apache.hadoop.hdds.scm.HddsTestUtils;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerManager;
import org.apache.hadoop.hdds.scm.container.MockNodeManager;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
import org.apache.hadoop.hdds.scm.ha.SCMHAManagerStub;
Expand All @@ -58,6 +59,8 @@
import org.slf4j.LoggerFactory;

import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

/**
* This class tests OneReplicaPipelineSafeModeRule.
Expand Down Expand Up @@ -86,7 +89,8 @@ private void setup(int nodes, int pipelineFactorThreeCount,
List<ContainerInfo> containers = new ArrayList<>();
containers.addAll(HddsTestUtils.getContainerInfo(1));
mockNodeManager = new MockNodeManager(true, nodes);

ContainerManager containerManager = mock(ContainerManager.class);
when(containerManager.getContainers()).thenReturn(containers);
eventQueue = new EventQueue();
serviceManager = new SCMServiceManager();
scmContext = SCMContext.emptyContext();
Expand Down Expand Up @@ -116,7 +120,7 @@ private void setup(int nodes, int pipelineFactorThreeCount,
HddsProtos.ReplicationFactor.ONE);

SCMSafeModeManager scmSafeModeManager =
new SCMSafeModeManager(ozoneConfiguration, containers, null,
new SCMSafeModeManager(ozoneConfiguration, containers, containerManager,
pipelineManager, eventQueue, serviceManager, scmContext);

rule = scmSafeModeManager.getOneReplicaPipelineSafeModeRule();
Expand Down
Loading

0 comments on commit befd64e

Please sign in to comment.