Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into HDDS-11852
Browse files Browse the repository at this point in the history
  • Loading branch information
adoroszlai committed Dec 10, 2024
2 parents 4881e2e + ec348a7 commit 7c3ba57
Show file tree
Hide file tree
Showing 31 changed files with 1,998 additions and 176 deletions.
2 changes: 1 addition & 1 deletion .asf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
github:
description: "Scalable, redundant, and distributed object store for Apache Hadoop"
description: "Scalable, reliable, distributed storage system optimized for data analytics and object store workloads."
homepage: https://ozone.apache.org
labels:
- hadoop
Expand Down
31 changes: 29 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ on:
env:
FAIL_FAST: ${{ github.event_name == 'pull_request' }}
# Minimum required Java version for running Ozone is defined in pom.xml (javac.version).
TEST_JAVA_VERSION: 17 # JDK version used by CI build and tests; should match the JDK version in apache/ozone-runner image
TEST_JAVA_VERSION: 21 # JDK version used by CI build and tests; should match the JDK version in apache/ozone-runner image
MAVEN_OPTS: -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false -Dmaven.wagon.http.retryHandler.class=standard -Dmaven.wagon.http.retryHandler.count=3
HADOOP_IMAGE: ghcr.io/apache/hadoop
OZONE_IMAGE: ghcr.io/apache/ozone
Expand Down Expand Up @@ -257,7 +257,7 @@ jobs:
key: maven-repo-${{ hashFiles('**/pom.xml') }}
restore-keys: |
maven-repo-
if: ${{ !contains('author,bats,docs', matrix.check) }}
if: ${{ !contains('author,bats', matrix.check) }}
- name: Download Ratis repo
if: ${{ inputs.ratis_args != '' }}
uses: actions/download-artifact@v4
Expand Down Expand Up @@ -343,6 +343,15 @@ jobs:
uses: actions/checkout@v4
with:
ref: ${{ needs.build-info.outputs.sha }}
- name: Cache for maven dependencies
uses: actions/cache/restore@v4
with:
path: |
~/.m2/repository/*/*/*
!~/.m2/repository/org/apache/ozone
key: maven-repo-${{ hashFiles('**/pom.xml') }}
restore-keys: |
maven-repo-
- name: Download compiled Ozone binaries
uses: actions/download-artifact@v4
with:
Expand Down Expand Up @@ -486,6 +495,15 @@ jobs:
uses: actions/checkout@v4
with:
ref: ${{ needs.build-info.outputs.sha }}
- name: Cache for maven dependencies
uses: actions/cache/restore@v4
with:
path: |
~/.m2/repository/*/*/*
!~/.m2/repository/org/apache/ozone
key: maven-repo-${{ hashFiles('**/pom.xml') }}
restore-keys: |
maven-repo-
- name: Download compiled Ozone binaries
uses: actions/download-artifact@v4
with:
Expand Down Expand Up @@ -530,6 +548,15 @@ jobs:
uses: actions/checkout@v4
with:
ref: ${{ needs.build-info.outputs.sha }}
- name: Cache for maven dependencies
uses: actions/cache/restore@v4
with:
path: |
~/.m2/repository/*/*/*
!~/.m2/repository/org/apache/ozone
key: maven-repo-${{ hashFiles('**/pom.xml') }}
restore-keys: |
maven-repo-
- name: Download compiled Ozone binaries
uses: actions/download-artifact@v4
with:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/populate-cache.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:

- name: Fetch dependencies
if: steps.restore-cache.outputs.cache-hit != 'true'
run: mvn --batch-mode --no-transfer-progress --show-version -Pgo-offline -Pdist clean verify
run: mvn --batch-mode --no-transfer-progress --show-version -Pgo-offline -Pdist -Drocks_tools_native clean verify

- name: Delete Ozone jars from repo
if: steps.restore-cache.outputs.cache-hit != 'true'
Expand Down
12 changes: 0 additions & 12 deletions hadoop-hdds/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,6 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd">
<module>rocks-native</module>
</modules>

<repositories>
<repository>
<id>apache.snapshots.https</id>
<url>https://repository.apache.org/content/repositories/snapshots</url>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>apache.snapshots.https</id>
<url>https://repository.apache.org/content/repositories/snapshots</url>
</pluginRepository>
</pluginRepositories>
<dependencyManagement>
<dependencies>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ public class ContainerSafeModeRule extends
private AtomicLong ecContainerWithMinReplicas = new AtomicLong(0);
private final ContainerManager containerManager;

public ContainerSafeModeRule(String ruleName, EventQueue eventQueue,
ConfigurationSource conf,
ContainerManager containerManager, SCMSafeModeManager manager) {
this(ruleName, eventQueue, conf, containerManager.getContainers(), containerManager, manager);
}

public ContainerSafeModeRule(String ruleName, EventQueue eventQueue,
ConfigurationSource conf,
List<ContainerInfo> containers,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hdds.HddsConfigKeys;
Expand Down Expand Up @@ -90,7 +91,7 @@ public class SCMSafeModeManager implements SafeModeManager {
private AtomicBoolean preCheckComplete = new AtomicBoolean(false);
private AtomicBoolean forceExitSafeMode = new AtomicBoolean(false);

private Map<String, SafeModeExitRule> exitRules = new HashMap(1);
private Map<String, SafeModeExitRule> exitRules = new HashMap<>(1);
private Set<String> preCheckRules = new HashSet<>(1);
private ConfigurationSource config;
private static final String CONT_EXIT_RULE = "ContainerSafeModeRule";
Expand All @@ -110,6 +111,8 @@ public class SCMSafeModeManager implements SafeModeManager {

private final SafeModeMetrics safeModeMetrics;


// TODO: Remove allContainers argument. (HDDS-11795)
public SCMSafeModeManager(ConfigurationSource conf,
List<ContainerInfo> allContainers,
ContainerManager containerManager, PipelineManager pipelineManager,
Expand All @@ -126,30 +129,17 @@ public SCMSafeModeManager(ConfigurationSource conf,

if (isSafeModeEnabled) {
this.safeModeMetrics = SafeModeMetrics.create();
ContainerSafeModeRule containerSafeModeRule =
new ContainerSafeModeRule(CONT_EXIT_RULE, eventQueue, config,
allContainers, containerManager, this);
DataNodeSafeModeRule dataNodeSafeModeRule =
new DataNodeSafeModeRule(DN_EXIT_RULE, eventQueue, config, this);
exitRules.put(CONT_EXIT_RULE, containerSafeModeRule);
exitRules.put(DN_EXIT_RULE, dataNodeSafeModeRule);
preCheckRules.add(DN_EXIT_RULE);
if (conf.getBoolean(
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK,
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT)
&& pipelineManager != null) {
HealthyPipelineSafeModeRule healthyPipelineSafeModeRule =
new HealthyPipelineSafeModeRule(HEALTHY_PIPELINE_EXIT_RULE,
eventQueue, pipelineManager,
this, config, scmContext);
OneReplicaPipelineSafeModeRule oneReplicaPipelineSafeModeRule =
new OneReplicaPipelineSafeModeRule(
ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE, eventQueue,
pipelineManager, this, conf);
exitRules.put(HEALTHY_PIPELINE_EXIT_RULE, healthyPipelineSafeModeRule);
exitRules.put(ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE,
oneReplicaPipelineSafeModeRule);
}

// TODO: Remove the cyclic ("this") dependency (HDDS-11797)
SafeModeRuleFactory.initialize(config, scmContext, eventQueue,
this, pipelineManager, containerManager);
SafeModeRuleFactory factory = SafeModeRuleFactory.getInstance();

exitRules = factory.getSafeModeRules().stream().collect(
Collectors.toMap(SafeModeExitRule::getRuleName, rule -> rule));

preCheckRules = factory.getPreCheckRules().stream()
.map(SafeModeExitRule::getRuleName).collect(Collectors.toSet());
} else {
this.safeModeMetrics = null;
exitSafeMode(eventQueue, true);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*      http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.hdds.scm.safemode;


import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.scm.container.ContainerManager;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
import org.apache.hadoop.hdds.server.events.EventQueue;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.List;

/**
* Factory to create SafeMode rules.
*/
public final class SafeModeRuleFactory {


private static final Logger LOG = LoggerFactory.getLogger(SafeModeRuleFactory.class);

// TODO: Move the rule names to respective rules. (HDDS-11798)
private static final String CONT_EXIT_RULE = "ContainerSafeModeRule";
private static final String DN_EXIT_RULE = "DataNodeSafeModeRule";
private static final String HEALTHY_PIPELINE_EXIT_RULE =
"HealthyPipelineSafeModeRule";
private static final String ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE =
"AtleastOneDatanodeReportedRule";

private final ConfigurationSource config;
private final SCMContext scmContext;
private final EventQueue eventQueue;

// TODO: Remove dependency on safeModeManager (HDDS-11797)
private final SCMSafeModeManager safeModeManager;
private final PipelineManager pipelineManager;
private final ContainerManager containerManager;

private final List<SafeModeExitRule<?>> safeModeRules;
private final List<SafeModeExitRule<?>> preCheckRules;

private static SafeModeRuleFactory instance;

private SafeModeRuleFactory(final ConfigurationSource config,
final SCMContext scmContext,
final EventQueue eventQueue,
final SCMSafeModeManager safeModeManager,
final PipelineManager pipelineManager,
final ContainerManager containerManager) {
this.config = config;
this.scmContext = scmContext;
this.eventQueue = eventQueue;
this.safeModeManager = safeModeManager;
this.pipelineManager = pipelineManager;
this.containerManager = containerManager;
this.safeModeRules = new ArrayList<>();
this.preCheckRules = new ArrayList<>();
loadRules();
}

private void loadRules() {
// TODO: Use annotation to load the rules. (HDDS-11730)
safeModeRules.add(new ContainerSafeModeRule(CONT_EXIT_RULE, eventQueue, config,
containerManager, safeModeManager));
SafeModeExitRule<?> dnRule = new DataNodeSafeModeRule(DN_EXIT_RULE, eventQueue, config, safeModeManager);
safeModeRules.add(dnRule);
preCheckRules.add(dnRule);

// TODO: Move isRuleEnabled check to the Rule implementation. (HDDS-11799)
if (config.getBoolean(
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK,
HddsConfigKeys.HDDS_SCM_SAFEMODE_PIPELINE_AVAILABILITY_CHECK_DEFAULT)
&& pipelineManager != null) {

safeModeRules.add(new HealthyPipelineSafeModeRule(HEALTHY_PIPELINE_EXIT_RULE,
eventQueue, pipelineManager, safeModeManager, config, scmContext));
safeModeRules.add(new OneReplicaPipelineSafeModeRule(
ATLEAST_ONE_DATANODE_REPORTED_PIPELINE_EXIT_RULE, eventQueue,
pipelineManager, safeModeManager, config));
}

}

public static synchronized SafeModeRuleFactory getInstance() {
if (instance != null) {
return instance;
}
throw new IllegalStateException("SafeModeRuleFactory not initialized," +
" call initialize method before getInstance.");
}

// TODO: Refactor and reduce the arguments. (HDDS-11800)
public static synchronized void initialize(
final ConfigurationSource config,
final SCMContext scmContext,
final EventQueue eventQueue,
final SCMSafeModeManager safeModeManager,
final PipelineManager pipelineManager,
final ContainerManager containerManager) {
instance = new SafeModeRuleFactory(config, scmContext, eventQueue,
safeModeManager, pipelineManager, containerManager);
}

public List<SafeModeExitRule<?>> getSafeModeRules() {
return safeModeRules;
}

public List<SafeModeExitRule<?>> getPreCheckRules() {
return preCheckRules;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,8 @@ public void testClosePipelineShouldFailOnFollower() throws Exception {
public void testPipelineReport() throws Exception {
try (PipelineManagerImpl pipelineManager = createPipelineManager(true)) {
SCMSafeModeManager scmSafeModeManager =
new SCMSafeModeManager(conf, new ArrayList<>(), null, pipelineManager,
new SCMSafeModeManager(conf, new ArrayList<>(),
mock(ContainerManager.class), pipelineManager,
new EventQueue(), serviceManager, scmContext);
Pipeline pipeline = pipelineManager
.createPipeline(RatisReplicationConfig
Expand Down Expand Up @@ -469,7 +470,7 @@ public void testPipelineOpenOnlyWhenLeaderReported() throws Exception {

SCMSafeModeManager scmSafeModeManager =
new SCMSafeModeManager(new OzoneConfiguration(), new ArrayList<>(),
null, pipelineManager, new EventQueue(),
mock(ContainerManager.class), pipelineManager, new EventQueue(),
serviceManager, scmContext);
PipelineReportHandler pipelineReportHandler =
new PipelineReportHandler(scmSafeModeManager, pipelineManager,
Expand Down
Loading

0 comments on commit 7c3ba57

Please sign in to comment.