Skip to content

Commit 3833c61

Browse files
author
Chen Liang
committed
HDFS-15404. ShellCommandFencer should expose info about source. Contributed by Chen Liang.
1 parent 736bed6 commit 3833c61

File tree

8 files changed

+127
-15
lines changed

8 files changed

+127
-15
lines changed

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/FailoverController.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ public void failover(HAServiceTarget fromSvc,
213213

214214
// Fence fromSvc if it's required or forced by the user
215215
if (tryFence) {
216-
if (!fromSvc.getFencer().fence(fromSvc)) {
216+
if (!fromSvc.getFencer().fence(fromSvc, toSvc)) {
217217
throw new FailoverFailedException("Unable to fence " +
218218
fromSvc + ". Fencing failed.");
219219
}

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceTarget.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ public abstract class HAServiceTarget {
4444
private static final String PORT_SUBST_KEY = "port";
4545
private static final String ADDRESS_SUBST_KEY = "address";
4646

47+
/**
48+
* The HAState this service target is intended to be after transition
49+
* is complete.
50+
*/
51+
private HAServiceProtocol.HAServiceState transitionTargetHAStatus;
52+
4753
/**
4854
* @return the IPC address of the target node.
4955
*/
@@ -93,6 +99,15 @@ public HAServiceProtocol getProxy(Configuration conf, int timeoutMs)
9399
return getProxyForAddress(conf, timeoutMs, getAddress());
94100
}
95101

102+
public void setTransitionTargetHAStatus(
103+
HAServiceProtocol.HAServiceState status) {
104+
this.transitionTargetHAStatus = status;
105+
}
106+
107+
public HAServiceProtocol.HAServiceState getTransitionTargetHAStatus() {
108+
return this.transitionTargetHAStatus;
109+
}
110+
96111
/**
97112
* Returns a proxy to connect to the target HA service for health monitoring.
98113
* If {@link #getHealthMonitorAddress()} is implemented to return a non-null

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/NodeFencer.java

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,15 +89,32 @@ public static NodeFencer create(Configuration conf, String confKey)
8989
}
9090

9191
public boolean fence(HAServiceTarget fromSvc) {
92+
return fence(fromSvc, null);
93+
}
94+
95+
public boolean fence(HAServiceTarget fromSvc, HAServiceTarget toSvc) {
9296
LOG.info("====== Beginning Service Fencing Process... ======");
9397
int i = 0;
9498
for (FenceMethodWithArg method : methods) {
9599
LOG.info("Trying method " + (++i) + "/" + methods.size() +": " + method);
96100

97101
try {
98-
if (method.method.tryFence(fromSvc, method.arg)) {
99-
LOG.info("====== Fencing successful by method " + method + " ======");
100-
return true;
102+
// only true when target node is given, AND fencing on it failed
103+
boolean toSvcFencingFailed = false;
104+
// if target is given, try to fence on target first. Only if fencing
105+
// on target succeeded, do fencing on source node.
106+
if (toSvc != null) {
107+
toSvcFencingFailed = !method.method.tryFence(toSvc, method.arg);
108+
}
109+
if (toSvcFencingFailed) {
110+
LOG.error("====== Fencing on target failed, skipping fencing "
111+
+ "on source ======");
112+
} else {
113+
if (method.method.tryFence(fromSvc, method.arg)) {
114+
LOG.info("====== Fencing successful by method "
115+
+ method + " ======");
116+
return true;
117+
}
101118
}
102119
} catch (BadFencingConfigurationException e) {
103120
LOG.error("Fencing method " + method + " misconfigured", e);

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
import java.io.IOException;
2121
import java.lang.reflect.Field;
22+
import java.util.Arrays;
2223
import java.util.Map;
2324

2425
import org.apache.hadoop.conf.Configured;
@@ -60,6 +61,11 @@ public class ShellCommandFencer
6061
/** Prefix for target parameters added to the environment */
6162
private static final String TARGET_PREFIX = "target_";
6263

64+
/** Prefix for source parameters added to the environment */
65+
private static final String SOURCE_PREFIX = "source_";
66+
67+
private static final String ARG_DELIMITER = ",";
68+
6369
@VisibleForTesting
6470
static Logger LOG = LoggerFactory.getLogger(ShellCommandFencer.class);
6571

@@ -73,8 +79,9 @@ public void checkArgs(String args) throws BadFencingConfigurationException {
7379
}
7480

7581
@Override
76-
public boolean tryFence(HAServiceTarget target, String cmd) {
82+
public boolean tryFence(HAServiceTarget target, String args) {
7783
ProcessBuilder builder;
84+
String cmd = parseArgs(target.getTransitionTargetHAStatus(), args);
7885

7986
if (!Shell.WINDOWS) {
8087
builder = new ProcessBuilder("bash", "-e", "-c", cmd);
@@ -127,6 +134,28 @@ public boolean tryFence(HAServiceTarget target, String cmd) {
127134
return rc == 0;
128135
}
129136

137+
private String parseArgs(HAServiceProtocol.HAServiceState state,
138+
String cmd) {
139+
String[] args = cmd.split(ARG_DELIMITER);
140+
if (args.length == 1) {
141+
// only one command is given, assuming both src and dst
142+
// will execute the same command/script.
143+
return args[0];
144+
}
145+
if (args.length > 2) {
146+
throw new IllegalArgumentException("Expecting arguments size of at most "
147+
+ "two, getting " + Arrays.asList(args));
148+
}
149+
if (HAServiceProtocol.HAServiceState.ACTIVE.equals(state)) {
150+
return args[0];
151+
} else if (HAServiceProtocol.HAServiceState.STANDBY.equals(state)) {
152+
return args[1];
153+
} else {
154+
throw new IllegalArgumentException(
155+
"Unexpected HA service state:" + state);
156+
}
157+
}
158+
130159
/**
131160
* Abbreviate a string by putting '...' in the middle of it,
132161
* in an attempt to keep logs from getting too messy.
@@ -190,9 +219,24 @@ private void setConfAsEnvVars(Map<String, String> env) {
190219
*/
191220
private void addTargetInfoAsEnvVars(HAServiceTarget target,
192221
Map<String, String> environment) {
222+
String prefix;
223+
HAServiceProtocol.HAServiceState targetState =
224+
target.getTransitionTargetHAStatus();
225+
if (targetState == null ||
226+
HAServiceProtocol.HAServiceState.ACTIVE.equals(targetState)) {
227+
// null is assumed to be same as ACTIVE, this is to be compatible
228+
// with existing tests/use cases where target state is not specified
229+
// but assuming it's active.
230+
prefix = TARGET_PREFIX;
231+
} else if (HAServiceProtocol.HAServiceState.STANDBY.equals(targetState)) {
232+
prefix = SOURCE_PREFIX;
233+
} else {
234+
throw new IllegalArgumentException(
235+
"Unexpected HA service state:" + targetState);
236+
}
193237
for (Map.Entry<String, String> e :
194238
target.getFencingParameters().entrySet()) {
195-
String key = TARGET_PREFIX + e.getKey();
239+
String key = prefix + e.getKey();
196240
key = key.replace('.', '_');
197241
environment.put(key, e.getValue());
198242
}

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestFailoverController.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ public void testFailoverFromFaultyServiceSucceeds() throws Exception {
177177
}
178178

179179
// svc1 still thinks it's active, that's OK, it was fenced
180-
assertEquals(1, AlwaysSucceedFencer.fenceCalled);
180+
assertEquals(2, AlwaysSucceedFencer.fenceCalled);
181181
assertSame(svc1, AlwaysSucceedFencer.fencedSvc);
182182
assertEquals(HAServiceState.ACTIVE, svc1.state);
183183
assertEquals(HAServiceState.ACTIVE, svc2.state);
@@ -201,7 +201,7 @@ public void testFailoverFromFaultyServiceFencingFailure() throws Exception {
201201
}
202202

203203
assertEquals(1, AlwaysFailFencer.fenceCalled);
204-
assertSame(svc1, AlwaysFailFencer.fencedSvc);
204+
assertSame(svc2, AlwaysFailFencer.fencedSvc);
205205
assertEquals(HAServiceState.ACTIVE, svc1.state);
206206
assertEquals(HAServiceState.STANDBY, svc2.state);
207207
}
@@ -223,7 +223,7 @@ public void testFencingFailureDuringFailover() throws Exception {
223223
// If fencing was requested and it failed we don't try to make
224224
// svc2 active anyway, and we don't failback to svc1.
225225
assertEquals(1, AlwaysFailFencer.fenceCalled);
226-
assertSame(svc1, AlwaysFailFencer.fencedSvc);
226+
assertSame(svc2, AlwaysFailFencer.fencedSvc);
227227
assertEquals(HAServiceState.STANDBY, svc1.state);
228228
assertEquals(HAServiceState.STANDBY, svc2.state);
229229
}
@@ -344,7 +344,7 @@ public void testWeFenceOnFailbackIfTransitionToActiveFails() throws Exception {
344344
// and we didn't force it, so we failed back to svc1 and fenced svc2.
345345
// Note svc2 still thinks it's active, that's OK, we fenced it.
346346
assertEquals(HAServiceState.ACTIVE, svc1.state);
347-
assertEquals(1, AlwaysSucceedFencer.fenceCalled);
347+
assertEquals(2, AlwaysSucceedFencer.fenceCalled);
348348
assertSame(svc2, AlwaysSucceedFencer.fencedSvc);
349349
}
350350

@@ -373,7 +373,7 @@ public void testFailureToFenceOnFailbackFailsTheFailback() throws Exception {
373373
// so we did not failback to svc1, ie it's still standby.
374374
assertEquals(HAServiceState.STANDBY, svc1.state);
375375
assertEquals(1, AlwaysFailFencer.fenceCalled);
376-
assertSame(svc2, AlwaysFailFencer.fencedSvc);
376+
assertSame(svc1, AlwaysFailFencer.fencedSvc);
377377
}
378378

379379
@Test

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,37 @@ public void testTargetAsEnvironment() {
163163
}
164164
}
165165

166+
/**
167+
* Test if fencing target has peer set, the failover can trigger different
168+
* commands on source and destination respectively.
169+
*/
170+
@Test
171+
public void testEnvironmentWithPeer() {
172+
HAServiceTarget target = new DummyHAService(HAServiceState.ACTIVE,
173+
new InetSocketAddress("dummytarget", 1111));
174+
HAServiceTarget source = new DummyHAService(HAServiceState.STANDBY,
175+
new InetSocketAddress("dummysource", 2222));
176+
target.setTransitionTargetHAStatus(HAServiceState.ACTIVE);
177+
source.setTransitionTargetHAStatus(HAServiceState.STANDBY);
178+
String cmd = "echo $target_host $target_port,"
179+
+ "echo $source_host $source_port";
180+
if (!Shell.WINDOWS) {
181+
fencer.tryFence(target, cmd);
182+
Mockito.verify(ShellCommandFencer.LOG).info(
183+
Mockito.contains("echo $ta...rget_port: dummytarget 1111"));
184+
fencer.tryFence(source, cmd);
185+
Mockito.verify(ShellCommandFencer.LOG).info(
186+
Mockito.contains("echo $so...urce_port: dummysource 2222"));
187+
} else {
188+
fencer.tryFence(target, cmd);
189+
Mockito.verify(ShellCommandFencer.LOG).info(
190+
Mockito.contains("echo %ta...get_port%: dummytarget 1111"));
191+
fencer.tryFence(source, cmd);
192+
Mockito.verify(ShellCommandFencer.LOG).info(
193+
Mockito.contains("echo %so...urce_port%: dummysource 2222"));
194+
}
195+
}
196+
166197

167198
/**
168199
* Test that we properly close off our input to the subprocess

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSHAAdmin.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,11 @@ private int failover(CommandLine cmd)
284284
HAServiceTarget fromNode = resolveTarget(args[0]);
285285
HAServiceTarget toNode = resolveTarget(args[1]);
286286

287+
fromNode.setTransitionTargetHAStatus(
288+
HAServiceProtocol.HAServiceState.STANDBY);
289+
toNode.setTransitionTargetHAStatus(
290+
HAServiceProtocol.HAServiceState.ACTIVE);
291+
287292
// Check that auto-failover is consistently configured for both nodes.
288293
Preconditions.checkState(
289294
fromNode.isAutoFailoverEnabled() ==

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSHAAdminMiniCluster.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -189,13 +189,13 @@ public void testFencer() throws Exception {
189189
tmpFile.deleteOnExit();
190190
if (Shell.WINDOWS) {
191191
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,
192-
"shell(echo %target_nameserviceid%.%target_namenodeid% " +
193-
"%target_port% %dfs_ha_namenode_id% > " +
192+
"shell(echo %source_nameserviceid%.%source_namenodeid% " +
193+
"%source_port% %dfs_ha_namenode_id% > " +
194194
tmpFile.getAbsolutePath() + ")");
195195
} else {
196196
conf.set(DFSConfigKeys.DFS_HA_FENCE_METHODS_KEY,
197-
"shell(echo -n $target_nameserviceid.$target_namenodeid " +
198-
"$target_port $dfs_ha_namenode_id > " +
197+
"shell(echo -n $source_nameserviceid.$source_namenodeid " +
198+
"$source_port $dfs_ha_namenode_id > " +
199199
tmpFile.getAbsolutePath() + ")");
200200
}
201201

0 commit comments

Comments
 (0)