Skip to content

Commit

Permalink
Add elasticsearch-node detach-cluster command (#37979)
Browse files Browse the repository at this point in the history
This commit adds the second part of `elasticsearch-node` tool -
`detach-cluster` command in addition to `unsafe-bootstrap` command.
Also, this commit changes the semantics of `unsafe-bootstrap`, now
`unsafe-bootstrap` changes clusterUUID.
So the algorithm of running `elasticsearch-node` tool is the following:
1) Stop all nodes in the cluster.
2) Pick master-eligible node with the highest (term, version) pair and
run the `unsafe-bootstrap` command on it. If there are no survived
master-eligible nodes - skip this step.
3) Run `detach-cluster` command on the remaining survived nodes.

Detach cluster makes the following changes to the node metadata:
1) Sets clusterUUID committed to false.
2) Sets currentTerm and term to 0. 
3) Removes voting tombstones and sets voting configurations to special
constant MUST_JOIN_ELECTED_MASTER, that prevents initial cluster
bootstrap.

`ElasticsearchNodeCommand` base abstract class is introduced, because
`UnsafeBootstrapMasterCommand` and `DetachClusterCommand` have a lot in
common.
Also, this commit adds "ordinal" parameter to both commands, because it's 
impossible to write IT otherwise.
For MUST_JOIN_ELECTED_MASTER case special handling is introduced in
`ClusterFormationFailureHelper`.
Tests for both commands reside in `ElasticsearchNodeCommandIT` (renamed
from `UnsafeBootstrapMasterIT`).
  • Loading branch information
andrershov authored Feb 1, 2019
1 parent 979e557 commit bda5914
Show file tree
Hide file tree
Showing 12 changed files with 720 additions and 375 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,12 @@ String getDescription() {

assert clusterState.getLastCommittedConfiguration().isEmpty() == false;

if (clusterState.getLastCommittedConfiguration().equals(VotingConfiguration.MUST_JOIN_ELECTED_MASTER)) {
return String.format(Locale.ROOT,
"master not discovered yet and this node was detached from its previous cluster, have discovered %s; %s",
foundPeers, discoveryWillContinueDescription);
}

final String quorumDescription;
if (clusterState.getLastAcceptedConfiguration().equals(clusterState.getLastCommittedConfiguration())) {
quorumDescription = describeQuorum(clusterState.getLastAcceptedConfiguration());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,8 @@ public String toString() {
public static class VotingConfiguration implements Writeable, ToXContentFragment {

public static final VotingConfiguration EMPTY_CONFIG = new VotingConfiguration(Collections.emptySet());
public static final VotingConfiguration MUST_JOIN_ELECTED_MASTER = new VotingConfiguration(Collections.singleton(
"_must_join_elected_master_"));

private final Set<String> nodeIds;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.cluster.coordination;

import joptsimple.OptionSet;
import org.elasticsearch.cli.Terminal;
import org.elasticsearch.cluster.metadata.Manifest;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.env.Environment;

import java.io.IOException;
import java.nio.file.Path;

public class DetachClusterCommand extends ElasticsearchNodeCommand {

static final String NODE_DETACHED_MSG = "Node was successfully detached from the cluster";
static final String CONFIRMATION_MSG =
"-------------------------------------------------------------------------------\n" +
"\n" +
"You should run this tool only if you have permanently lost all\n" +
"your master-eligible nodes, and you cannot restore the cluster\n" +
"from a snapshot, or you have already run `elasticsearch-node unsafe-bootstrap`\n" +
"on a master-eligible node that formed a cluster with this node.\n" +
"This tool can cause arbitrary data loss and its use should be your last resort.\n" +
"Do you want to proceed?\n";

public DetachClusterCommand() {
super("Detaches this node from its cluster, allowing it to unsafely join a new cluster");
}

@Override
protected void execute(Terminal terminal, OptionSet options, Environment env) throws Exception {
super.execute(terminal, options, env);

processNodePathsWithLock(terminal, options, env);

terminal.println(NODE_DETACHED_MSG);
}

@Override
protected void processNodePaths(Terminal terminal, Path[] dataPaths) throws IOException {
final Tuple<Manifest, MetaData> manifestMetaDataTuple = loadMetaData(terminal, dataPaths);
final Manifest manifest = manifestMetaDataTuple.v1();
final MetaData metaData = manifestMetaDataTuple.v2();

confirm(terminal, CONFIRMATION_MSG);

writeNewMetaData(terminal, manifest, updateCurrentTerm(), metaData, updateMetaData(metaData), dataPaths);
}

// package-private for tests
static MetaData updateMetaData(MetaData oldMetaData) {
final CoordinationMetaData coordinationMetaData = CoordinationMetaData.builder()
.lastAcceptedConfiguration(CoordinationMetaData.VotingConfiguration.MUST_JOIN_ELECTED_MASTER)
.lastCommittedConfiguration(CoordinationMetaData.VotingConfiguration.MUST_JOIN_ELECTED_MASTER)
.term(0)
.build();
return MetaData.builder(oldMetaData)
.coordinationMetaData(coordinationMetaData)
.clusterUUIDCommitted(false)
.build();
}

//package-private for tests
static long updateCurrentTerm() {
return 0;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.cluster.coordination;

import joptsimple.OptionParser;
import joptsimple.OptionSet;
import joptsimple.OptionSpec;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.store.LockObtainFailedException;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.cli.EnvironmentAwareCommand;
import org.elasticsearch.cli.Terminal;
import org.elasticsearch.cluster.ClusterModule;
import org.elasticsearch.cluster.metadata.Manifest;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.env.Environment;
import org.elasticsearch.env.NodeEnvironment;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Objects;

public abstract class ElasticsearchNodeCommand extends EnvironmentAwareCommand {
private static final Logger logger = LogManager.getLogger(ElasticsearchNodeCommand.class);
protected final NamedXContentRegistry namedXContentRegistry;
static final String STOP_WARNING_MSG =
"--------------------------------------------------------------------------\n" +
"\n" +
" WARNING: Elasticsearch MUST be stopped before running this tool." +
"\n";
static final String FAILED_TO_OBTAIN_NODE_LOCK_MSG = "failed to lock node's directory, is Elasticsearch still running?";
static final String NO_NODE_FOLDER_FOUND_MSG = "no node folder is found in data folder(s), node has not been started yet?";
static final String NO_MANIFEST_FILE_FOUND_MSG = "no manifest file is found, do you run pre 7.0 Elasticsearch?";
static final String GLOBAL_GENERATION_MISSING_MSG = "no metadata is referenced from the manifest file, cluster has never been " +
"bootstrapped?";
static final String NO_GLOBAL_METADATA_MSG = "failed to find global metadata, metadata corrupted?";
static final String WRITE_METADATA_EXCEPTION_MSG = "exception occurred when writing new metadata to disk";
static final String ABORTED_BY_USER_MSG = "aborted by user";
final OptionSpec<Integer> nodeOrdinalOption;

public ElasticsearchNodeCommand(String description) {
super(description);
nodeOrdinalOption = parser.accepts("ordinal", "Optional node ordinal, 0 if not specified")
.withRequiredArg().ofType(Integer.class);
namedXContentRegistry = new NamedXContentRegistry(ClusterModule.getNamedXWriteables());
}

protected void processNodePathsWithLock(Terminal terminal, OptionSet options, Environment env) throws IOException {
terminal.println(Terminal.Verbosity.VERBOSE, "Obtaining lock for node");
Integer nodeOrdinal = nodeOrdinalOption.value(options);
if (nodeOrdinal == null) {
nodeOrdinal = 0;
}
try (NodeEnvironment.NodeLock lock = new NodeEnvironment.NodeLock(nodeOrdinal, logger, env, Files::exists)) {
final Path[] dataPaths =
Arrays.stream(lock.getNodePaths()).filter(Objects::nonNull).map(p -> p.path).toArray(Path[]::new);
if (dataPaths.length == 0) {
throw new ElasticsearchException(NO_NODE_FOLDER_FOUND_MSG);
}
processNodePaths(terminal, dataPaths);
} catch (LockObtainFailedException ex) {
throw new ElasticsearchException(
FAILED_TO_OBTAIN_NODE_LOCK_MSG + " [" + ex.getMessage() + "]");
}
}

protected Tuple<Manifest, MetaData> loadMetaData(Terminal terminal, Path[] dataPaths) throws IOException {
terminal.println(Terminal.Verbosity.VERBOSE, "Loading manifest file");
final Manifest manifest = Manifest.FORMAT.loadLatestState(logger, namedXContentRegistry, dataPaths);

if (manifest == null) {
throw new ElasticsearchException(NO_MANIFEST_FILE_FOUND_MSG);
}
if (manifest.isGlobalGenerationMissing()) {
throw new ElasticsearchException(GLOBAL_GENERATION_MISSING_MSG);
}
terminal.println(Terminal.Verbosity.VERBOSE, "Loading global metadata file");
final MetaData metaData = MetaData.FORMAT.loadGeneration(logger, namedXContentRegistry, manifest.getGlobalGeneration(),
dataPaths);
if (metaData == null) {
throw new ElasticsearchException(NO_GLOBAL_METADATA_MSG + " [generation = " + manifest.getGlobalGeneration() + "]");
}

return Tuple.tuple(manifest, metaData);
}

protected void confirm(Terminal terminal, String msg) {
terminal.println(msg);
String text = terminal.readText("Confirm [y/N] ");
if (text.equalsIgnoreCase("y") == false) {
throw new ElasticsearchException(ABORTED_BY_USER_MSG);
}
}

@Override
protected void execute(Terminal terminal, OptionSet options, Environment env) throws Exception {
terminal.println(STOP_WARNING_MSG);
}

protected abstract void processNodePaths(Terminal terminal, Path[] dataPaths) throws IOException;


protected void writeNewMetaData(Terminal terminal, Manifest oldManifest, long newCurrentTerm,
MetaData oldMetaData, MetaData newMetaData, Path[] dataPaths) {
try {
terminal.println(Terminal.Verbosity.VERBOSE,
"[clusterUUID = " + oldMetaData.clusterUUID() + ", committed = " + oldMetaData.clusterUUIDCommitted() + "] => " +
"[clusterUUID = " + newMetaData.clusterUUID() + ", committed = " + newMetaData.clusterUUIDCommitted() + "]");
terminal.println(Terminal.Verbosity.VERBOSE, "New coordination metadata is " + newMetaData.coordinationMetaData());
terminal.println(Terminal.Verbosity.VERBOSE, "Writing new global metadata to disk");
long newGeneration = MetaData.FORMAT.write(newMetaData, dataPaths);
Manifest newManifest = new Manifest(newCurrentTerm, oldManifest.getClusterStateVersion(), newGeneration,
oldManifest.getIndexGenerations());
terminal.println(Terminal.Verbosity.VERBOSE, "New manifest is " + newManifest);
terminal.println(Terminal.Verbosity.VERBOSE, "Writing new manifest file to disk");
Manifest.FORMAT.writeAndCleanup(newManifest, dataPaths);
terminal.println(Terminal.Verbosity.VERBOSE, "Cleaning up old metadata");
MetaData.FORMAT.cleanupOldFiles(newGeneration, dataPaths);
} catch (Exception e) {
terminal.println(Terminal.Verbosity.VERBOSE, "Cleaning up new metadata");
MetaData.FORMAT.cleanupOldFiles(oldManifest.getGlobalGeneration(), dataPaths);
throw new ElasticsearchException(WRITE_METADATA_EXCEPTION_MSG, e);
}
}

//package-private for testing
OptionParser getParser() {
return parser;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public NodeToolCli() {
super("A CLI tool to unsafely recover a cluster after the permanent loss of too many master-eligible nodes", ()->{});
CommandLoggingConfigurator.configureLoggingWithoutConfig();
subcommands.put("unsafe-bootstrap", new UnsafeBootstrapMasterCommand());
subcommands.put("detach-cluster", new DetachClusterCommand());
}

public static void main(String[] args) throws Exception {
Expand Down
Loading

0 comments on commit bda5914

Please sign in to comment.