Skip to content

Commit b844b4d

Browse files
committed
Add test for dying with dignity (#28987)
I have long wanted an actual test that dying with dignity works. It is tricky because if dying with dignity works, it means the test JVM dies which is usually an abnormal condition. And anyway, how does one force a fatal error to be thrown. I was motivated to investigate this again by the fact that I missed a backport to one branch leading to an issue where Elasticsearch would not successfully die with dignity. And now we have a solution: we install a plugin that throws an out of memory error when it receives a request. We hack the standalone test infrastructure to prevent this from failing the test. To do this, we bypass the security manager and remove the PID file for the node; this tricks the test infrastructure into thinking that it does not need to stop the node. We also bypass seccomp so that we can fork jps to make sure that Elasticsearch really died. And to be extra paranoid, we parse the logs of the dead Elasticsearch process to make sure it died with dignity. Never forget.
1 parent d61df4a commit b844b4d

File tree

6 files changed

+254
-4
lines changed

6 files changed

+254
-4
lines changed

qa/die-with-dignity/build.gradle

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
apply plugin: 'elasticsearch.esplugin'
21+
22+
esplugin {
23+
description 'Out of memory plugin'
24+
classname 'org.elasticsearch.DieWithDignityPlugin'
25+
}
26+
27+
integTestRunner {
28+
systemProperty 'tests.security.manager', 'false'
29+
systemProperty 'tests.system_call_filter', 'false'
30+
systemProperty 'pidfile', "${-> integTest.getNodes().get(0).pidFile}"
31+
systemProperty 'log', "${-> integTest.getNodes().get(0).homeDir}/logs/${-> integTest.getNodes().get(0).clusterName}.log"
32+
systemProperty 'runtime.java.home', "${project.runtimeJavaHome}"
33+
}
34+
35+
test.enabled = false
36+
37+
check.dependsOn integTest
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch;
21+
22+
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
23+
import org.elasticsearch.cluster.node.DiscoveryNodes;
24+
import org.elasticsearch.common.settings.ClusterSettings;
25+
import org.elasticsearch.common.settings.IndexScopedSettings;
26+
import org.elasticsearch.common.settings.Settings;
27+
import org.elasticsearch.common.settings.SettingsFilter;
28+
import org.elasticsearch.plugins.ActionPlugin;
29+
import org.elasticsearch.plugins.Plugin;
30+
import org.elasticsearch.rest.RestController;
31+
import org.elasticsearch.rest.RestHandler;
32+
33+
import java.util.Collections;
34+
import java.util.List;
35+
import java.util.function.Supplier;
36+
37+
public class DieWithDignityPlugin extends Plugin implements ActionPlugin {
38+
39+
@Override
40+
public List<RestHandler> getRestHandlers(
41+
final Settings settings,
42+
final RestController restController,
43+
final ClusterSettings clusterSettings,
44+
final IndexScopedSettings indexScopedSettings,
45+
final SettingsFilter settingsFilter,
46+
final IndexNameExpressionResolver indexNameExpressionResolver,
47+
final Supplier<DiscoveryNodes> nodesInCluster) {
48+
return Collections.singletonList(new RestDieWithDignityAction(settings, restController));
49+
}
50+
51+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch;
21+
22+
import org.elasticsearch.client.node.NodeClient;
23+
import org.elasticsearch.common.settings.Settings;
24+
import org.elasticsearch.http.HttpStats;
25+
import org.elasticsearch.rest.BaseRestHandler;
26+
import org.elasticsearch.rest.BytesRestResponse;
27+
import org.elasticsearch.rest.RestController;
28+
import org.elasticsearch.rest.RestRequest;
29+
import org.elasticsearch.rest.RestStatus;
30+
31+
import java.io.IOException;
32+
33+
public class RestDieWithDignityAction extends BaseRestHandler {
34+
35+
RestDieWithDignityAction(final Settings settings, final RestController restController) {
36+
super(settings);
37+
restController.registerHandler(RestRequest.Method.GET, "/_die_with_dignity", this);
38+
}
39+
40+
@Override
41+
public String getName() {
42+
return "die_with_dignity_action";
43+
}
44+
45+
@Override
46+
protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException {
47+
throw new OutOfMemoryError("die with dignity");
48+
}
49+
50+
}
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.elasticsearch.qa.die_with_dignity;
21+
22+
import org.apache.http.ConnectionClosedException;
23+
import org.elasticsearch.client.Response;
24+
import org.elasticsearch.client.ResponseListener;
25+
import org.elasticsearch.common.io.PathUtils;
26+
import org.elasticsearch.test.rest.ESRestTestCase;
27+
28+
import java.io.BufferedReader;
29+
import java.io.InputStream;
30+
import java.io.InputStreamReader;
31+
import java.nio.file.Files;
32+
import java.nio.file.Path;
33+
import java.util.Iterator;
34+
import java.util.List;
35+
import java.util.concurrent.CountDownLatch;
36+
37+
import static org.hamcrest.Matchers.equalTo;
38+
import static org.hamcrest.Matchers.hasSize;
39+
import static org.hamcrest.Matchers.instanceOf;
40+
import static org.hamcrest.Matchers.not;
41+
42+
public class DieWithDignityIT extends ESRestTestCase {
43+
44+
public void testDieWithDignity() throws Exception {
45+
// deleting the PID file prevents stopping the cluster from failing since it occurs if and only if the PID file exists
46+
final Path pidFile = PathUtils.get(System.getProperty("pidfile"));
47+
final List<String> pidFileLines = Files.readAllLines(pidFile);
48+
assertThat(pidFileLines, hasSize(1));
49+
final int pid = Integer.parseInt(pidFileLines.get(0));
50+
Files.delete(pidFile);
51+
expectThrows(ConnectionClosedException.class, () -> client().performRequest("GET", "/_die_with_dignity"));
52+
53+
// the Elasticsearch process should die and disappear from the output of jps
54+
assertBusy(() -> {
55+
final String jpsPath = PathUtils.get(System.getProperty("runtime.java.home"), "bin/jps").toString();
56+
final Process process = new ProcessBuilder().command(jpsPath).start();
57+
assertThat(process.waitFor(), equalTo(0));
58+
try (InputStream is = process.getInputStream();
59+
BufferedReader in = new BufferedReader(new InputStreamReader(is, "UTF-8"))) {
60+
String line;
61+
while ((line = in.readLine()) != null) {
62+
final int currentPid = Integer.parseInt(line.split("\\s+")[0]);
63+
assertThat(line, pid, not(equalTo(currentPid)));
64+
}
65+
}
66+
});
67+
68+
// parse the logs and ensure that Elasticsearch died with the expected cause
69+
final List<String> lines = Files.readAllLines(PathUtils.get(System.getProperty("log")));
70+
71+
final Iterator<String> it = lines.iterator();
72+
73+
boolean fatalErrorOnTheNetworkLayer = false;
74+
boolean fatalErrorInThreadExiting = false;
75+
76+
while (it.hasNext() && (fatalErrorOnTheNetworkLayer == false || fatalErrorInThreadExiting == false)) {
77+
final String line = it.next();
78+
if (line.contains("fatal error on the network layer")) {
79+
fatalErrorOnTheNetworkLayer = true;
80+
} else if (line.matches(".*\\[ERROR\\]\\[o.e.b.ElasticsearchUncaughtExceptionHandler\\] \\[node-0\\]"
81+
+ " fatal error in thread \\[Thread-\\d+\\], exiting$")) {
82+
fatalErrorInThreadExiting = true;
83+
assertTrue(it.hasNext());
84+
assertThat(it.next(), equalTo("java.lang.OutOfMemoryError: die with dignity"));
85+
}
86+
}
87+
88+
assertTrue(fatalErrorOnTheNetworkLayer);
89+
assertTrue(fatalErrorInThreadExiting);
90+
}
91+
92+
@Override
93+
protected boolean preserveClusterUponCompletion() {
94+
// as the cluster is dead its state can not be wiped successfully so we have to bypass wiping the cluster
95+
return true;
96+
}
97+
98+
}

test/framework/src/main/java/org/elasticsearch/bootstrap/BootstrapForTesting.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ public class BootstrapForTesting {
7878
}
7979

8080
// just like bootstrap, initialize natives, then SM
81-
Bootstrap.initializeNatives(javaTmpDir, true, true, true);
81+
final boolean systemCallFilter = Booleans.parseBoolean(System.getProperty("tests.system_call_filter", "true"));
82+
Bootstrap.initializeNatives(javaTmpDir, true, systemCallFilter, true);
8283

8384
// initialize probes
8485
Bootstrap.initializeProbes();

test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,11 @@ public void initClient() throws IOException {
145145
*/
146146
@After
147147
public final void cleanUpCluster() throws Exception {
148-
wipeCluster();
149-
waitForClusterStateUpdatesToFinish();
150-
logIfThereAreRunningTasks();
148+
if (preserveClusterUponCompletion() == false) {
149+
wipeCluster();
150+
waitForClusterStateUpdatesToFinish();
151+
logIfThereAreRunningTasks();
152+
}
151153
}
152154

153155
@AfterClass
@@ -175,6 +177,17 @@ protected static RestClient adminClient() {
175177
return adminClient;
176178
}
177179

180+
/**
181+
* Returns whether to preserve the state of the cluster upon completion of this test. Defaults to false. If true, overrides the value of
182+
* {@link #preserveIndicesUponCompletion()}, {@link #preserveTemplatesUponCompletion()}, {@link #preserveReposUponCompletion()}, and
183+
* {@link #preserveSnapshotsUponCompletion()}.
184+
*
185+
* @return true if the state of the cluster should be preserved
186+
*/
187+
protected boolean preserveClusterUponCompletion() {
188+
return false;
189+
}
190+
178191
/**
179192
* Returns whether to preserve the indices created during this test on completion of this test.
180193
* Defaults to {@code false}. Override this method if indices should be preserved after the test,

0 commit comments

Comments
 (0)