Skip to content

Commit 0cfffd5

Browse files
author
Andrey Ershov
committed
Change meta data write failure semantics
Write should clearly report if storage is left in dirty state.
1 parent 1ec0c73 commit 0cfffd5

File tree

9 files changed

+250
-75
lines changed

9 files changed

+250
-75
lines changed

server/src/main/java/org/elasticsearch/env/NodeEnvironment.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
import org.elasticsearch.common.unit.TimeValue;
5252
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
5353
import org.elasticsearch.gateway.MetaDataStateFormat;
54+
import org.elasticsearch.gateway.WriteStateException;
5455
import org.elasticsearch.index.Index;
5556
import org.elasticsearch.index.IndexSettings;
5657
import org.elasticsearch.index.shard.ShardId;
@@ -390,7 +391,11 @@ private static NodeMetaData loadOrCreateNodeMetaData(Settings settings, Logger l
390391
metaData = new NodeMetaData(generateNodeId(settings));
391392
}
392393
// we write again to make sure all paths have the latest state file
393-
NodeMetaData.FORMAT.write(metaData, paths);
394+
try {
395+
NodeMetaData.FORMAT.write(metaData, paths);
396+
} catch (WriteStateException e) {
397+
throw new IOException(e);
398+
}
394399
return metaData;
395400
}
396401

server/src/main/java/org/elasticsearch/gateway/MetaDataStateFormat.java

Lines changed: 123 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.apache.lucene.store.IndexOutput;
3131
import org.apache.lucene.store.SimpleFSDirectory;
3232
import org.elasticsearch.ExceptionsHelper;
33+
import org.elasticsearch.common.collect.Tuple;
3334
import org.elasticsearch.common.logging.Loggers;
3435
import org.elasticsearch.common.lucene.store.IndexOutputOutputStream;
3536
import org.elasticsearch.common.lucene.store.InputStreamIndexInput;
@@ -91,88 +92,162 @@ private static void deleteFileIfExists(Path stateLocation, Directory directory,
9192
logger.trace("cleaned up {}", stateLocation.resolve(fileName));
9293
}
9394

94-
private void writeStateToFirstLocation(final T state, Path stateLocation, Directory stateDir, String fileName, String tmpFileName)
95-
throws IOException {
95+
private static void deleteFileIgnoreExceptions(Path stateLocation, Directory directory, String fileName) {
9696
try {
97-
deleteFileIfExists(stateLocation, stateDir, tmpFileName);
98-
try (IndexOutput out = stateDir.createOutput(tmpFileName, IOContext.DEFAULT)) {
99-
CodecUtil.writeHeader(out, STATE_FILE_CODEC, STATE_FILE_VERSION);
100-
out.writeInt(FORMAT.index());
101-
try (XContentBuilder builder = newXContentBuilder(FORMAT, new IndexOutputOutputStream(out) {
102-
@Override
103-
public void close() throws IOException {
104-
// this is important since some of the XContentBuilders write bytes on close.
105-
// in order to write the footer we need to prevent closing the actual index input.
106-
}
107-
})) {
97+
deleteFileIfExists(stateLocation, directory, fileName);
98+
} catch (IOException e) {
99+
logger.trace("clean up failed {}", stateLocation.resolve(fileName));
100+
}
101+
}
102+
103+
private static void performDirectoryCleanup(Path stateLocation, Directory stateDir, String tmpFileName) {
104+
deleteFileIgnoreExceptions(stateLocation, stateDir, tmpFileName);
105+
IOUtils.closeWhileHandlingException(stateDir);
106+
}
108107

109-
builder.startObject();
110-
{
111-
toXContent(builder, state);
108+
private Directory writeStateToFirstLocation(final T state, Path stateLocation, String tmpFileName)
109+
throws WriteStateException {
110+
try {
111+
Directory stateDir = newDirectory(stateLocation);
112+
try {
113+
deleteFileIfExists(stateLocation, stateDir, tmpFileName);
114+
try (IndexOutput out = stateDir.createOutput(tmpFileName, IOContext.DEFAULT)) {
115+
CodecUtil.writeHeader(out, STATE_FILE_CODEC, STATE_FILE_VERSION);
116+
out.writeInt(FORMAT.index());
117+
try (XContentBuilder builder = newXContentBuilder(FORMAT, new IndexOutputOutputStream(out) {
118+
@Override
119+
public void close() throws IOException {
120+
// this is important since some of the XContentBuilders write bytes on close.
121+
// in order to write the footer we need to prevent closing the actual index input.
122+
}
123+
})) {
124+
125+
builder.startObject();
126+
{
127+
toXContent(builder, state);
128+
}
129+
builder.endObject();
112130
}
113-
builder.endObject();
131+
CodecUtil.writeFooter(out);
114132
}
115-
CodecUtil.writeFooter(out);
116-
} catch (IllegalStateException e) {
117-
throw new IOException(e);
118-
}
119133

120-
stateDir.sync(Collections.singleton(tmpFileName));
121-
stateDir.rename(tmpFileName, fileName);
122-
stateDir.syncMetaData();
123-
logger.trace("written state to {}", stateLocation.resolve(fileName));
124-
} finally {
125-
deleteFileIfExists(stateLocation, stateDir, tmpFileName);
134+
stateDir.sync(Collections.singleton(tmpFileName));
135+
} catch (Exception e) {
136+
// perform clean up only in case of exception, we need to keep directory open and temporary file on disk
137+
// if everything is ok for the next algorithm steps
138+
performDirectoryCleanup(stateLocation, stateDir, tmpFileName);
139+
throw e;
140+
}
141+
return stateDir;
142+
} catch (Exception e) {
143+
throw new WriteStateException(false, "failed to write state to the first location tmp file", e);
126144
}
127145
}
128146

129-
private void copyStateToExtraLocation(Directory srcStateDir, Path extraStateLocation, String fileName, String tmpFileName)
130-
throws IOException {
131-
try (Directory extraStateDir = newDirectory(extraStateLocation)) {
147+
private Directory copyStateToExtraLocation(Directory srcStateDir, Path extraStateLocation, String tmpFileName)
148+
throws WriteStateException {
149+
try {
150+
Directory extraStateDir = newDirectory(extraStateLocation);
132151
try {
133152
deleteFileIfExists(extraStateLocation, extraStateDir, tmpFileName);
134-
extraStateDir.copyFrom(srcStateDir, fileName, tmpFileName, IOContext.DEFAULT);
153+
extraStateDir.copyFrom(srcStateDir, tmpFileName, tmpFileName, IOContext.DEFAULT);
135154
extraStateDir.sync(Collections.singleton(tmpFileName));
136-
extraStateDir.rename(tmpFileName, fileName);
137-
extraStateDir.syncMetaData();
138-
logger.trace("copied state to {}", extraStateLocation.resolve(fileName));
139-
} finally {
140-
deleteFileIfExists(extraStateLocation, extraStateDir, tmpFileName);
155+
} catch (Exception e) {
156+
// perform clean up only in case of exception, we need to keep directory open and temporary file on disk
157+
// if everything is ok for the next algorithm steps
158+
performDirectoryCleanup(extraStateLocation, extraStateDir, tmpFileName);
159+
throw e;
141160
}
161+
return extraStateDir;
162+
} catch (Exception e) {
163+
throw new WriteStateException(false, "failed to copy tmp state file to extra location", e);
142164
}
143165
}
144166

167+
public void performRenames(String tmpFileName, String fileName, final List<Tuple<Path, Directory>> stateDirectories) throws
168+
WriteStateException {
169+
Directory firstStateDirectory = stateDirectories.get(0).v2();
170+
try {
171+
firstStateDirectory.rename(tmpFileName, fileName);
172+
} catch (IOException e) {
173+
throw new WriteStateException(false, "failed to rename tmp file to final name in the first state location", e);
174+
}
175+
176+
for (int i = 1; i < stateDirectories.size(); i++) {
177+
Directory extraStateDirectory = stateDirectories.get(i).v2();
178+
try {
179+
extraStateDirectory.rename(tmpFileName, fileName);
180+
} catch (IOException e) {
181+
throw new WriteStateException(true, "failed to rename tmp file to final name in extra state location",
182+
e);
183+
}
184+
}
185+
}
186+
187+
public void performStateDirectoriesFsync(List<Tuple<Path, Directory>> stateDirectories) throws WriteStateException {
188+
for (int i = 0; i < stateDirectories.size(); i++) {
189+
try {
190+
stateDirectories.get(i).v2().syncMetaData();
191+
} catch (IOException e) {
192+
throw new WriteStateException(true, "meta data directory fsync has failed", e);
193+
}
194+
}
195+
}
196+
197+
145198
/**
146199
* Writes the given state to the given directories. The state is written to a
147200
* state directory ({@value #STATE_DIR_NAME}) underneath each of the given file locations and is created if it
148201
* doesn't exist. The state is serialized to a temporary file in that directory and is then atomically moved to
149202
* it's target filename of the pattern {@code {prefix}{version}.st}.
150-
* If this method returns without exception there is a guarantee that state is persisted to the disk and loadLatestState will return it.
151-
* But if this method throws an exception, loadLatestState could return this state or some previous state.
203+
* If this method returns without exception there is a guarantee that state is persisted to the disk and loadLatestState will return
204+
* it. <br>
205+
* This method may throw an {@link WriteStateException} if some exception during writing state occurs. <br>
206+
* If {@link WriteStateException#isDirty()} returns false, there is a guarantee that loadLatestState will return old state. <br>
207+
* If {@link WriteStateException#isDirty()} returns true, loadLatestState could return new state or previous state.
152208
*
153209
* @param state the state object to write
154210
* @param locations the locations where the state should be written to.
155-
* @throws IOException if an IOException occurs
211+
* @throws WriteStateException if some exception during writing state occurs.
156212
*/
157-
public final void write(final T state, final Path... locations) throws IOException {
213+
214+
public final void write(final T state, final Path... locations) throws WriteStateException {
158215
if (locations == null) {
159216
throw new IllegalArgumentException("Locations must not be null");
160217
}
161218
if (locations.length <= 0) {
162219
throw new IllegalArgumentException("One or more locations required");
163220
}
164-
final long maxStateId = findMaxStateId(prefix, locations) + 1;
221+
222+
long maxStateId;
223+
try {
224+
maxStateId = findMaxStateId(prefix, locations) + 1;
225+
} catch (Exception e) {
226+
throw new WriteStateException(false, "exception during looking up max state id", e);
227+
}
165228
assert maxStateId >= 0 : "maxStateId must be positive but was: [" + maxStateId + "]";
166229

167230
final String fileName = prefix + maxStateId + STATE_FILE_EXTENSION;
168231
final String tmpFileName = fileName + ".tmp";
169232
final Path firstStateLocation = locations[0].resolve(STATE_DIR_NAME);
170-
try (Directory stateDir = newDirectory(firstStateLocation)) {
171-
writeStateToFirstLocation(state, firstStateLocation, stateDir, fileName, tmpFileName);
233+
List<Tuple<Path, Directory>> directories = new ArrayList<>();
172234

235+
try {
236+
Directory firstStateDir = writeStateToFirstLocation(state, firstStateLocation, tmpFileName);
237+
directories.add(new Tuple<>(firstStateLocation, firstStateDir));
173238
for (int i = 1; i < locations.length; i++) {
174239
final Path extraStateLocation = locations[i].resolve(STATE_DIR_NAME);
175-
copyStateToExtraLocation(stateDir, extraStateLocation, fileName, tmpFileName);
240+
Directory extraStateDir = copyStateToExtraLocation(firstStateDir, extraStateLocation, tmpFileName);
241+
directories.add(new Tuple<>(extraStateLocation, extraStateDir));
242+
}
243+
performRenames(tmpFileName, fileName, directories);
244+
performStateDirectoriesFsync(directories);
245+
} finally {
246+
//writeStateToFirstLocation and copyStateToExtraLocation perform clean up for themselves if they fail
247+
//we need to perform clean up for all data paths that were successfully opened and temporary file was created
248+
for (int i = 0; i < directories.size(); i++) {
249+
Tuple<Path, Directory> pathAndDirectory = directories.get(i);
250+
performDirectoryCleanup(pathAndDirectory.v1(), pathAndDirectory.v2(), tmpFileName);
176251
}
177252
}
178253

@@ -229,16 +304,18 @@ protected Directory newDirectory(Path dir) throws IOException {
229304
return new SimpleFSDirectory(dir);
230305
}
231306

232-
private void cleanupOldFiles(final String currentStateFile, Path[] locations) throws IOException {
307+
private void cleanupOldFiles(final String currentStateFile, Path[] locations) {
233308
for (Path location : locations) {
234309
logger.trace("cleanupOldFiles: cleaning up {}", location);
235310
Path stateLocation = location.resolve(STATE_DIR_NAME);
236311
try (Directory stateDir = newDirectory(stateLocation)) {
237312
for (String file : stateDir.listAll()) {
238313
if (file.startsWith(prefix) && file.equals(currentStateFile) == false) {
239-
deleteFileIfExists(stateLocation, stateDir, file);
314+
deleteFileIgnoreExceptions(stateLocation, stateDir, file);
240315
}
241316
}
317+
} catch (Exception e) {
318+
logger.trace("clean up failed for state location {}", stateLocation);
242319
}
243320
}
244321
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Licensed to Elasticsearch under one or more contributor
3+
* license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright
5+
* ownership. Elasticsearch licenses this file to you under
6+
* the Apache License, Version 2.0 (the "License"); you may
7+
* not use this file except in compliance with the License.
8+
* You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
package org.elasticsearch.gateway;
20+
21+
/**
22+
* This exception is thrown when there is a problem of writing state to disk. <br>
23+
* If {@link #isDirty()} returns false, state is guaranteed to be not written to disk.
24+
* If {@link #isDirty()} returns true, we don't know if state is written to disk.
25+
*/
26+
public class WriteStateException extends Exception {
27+
private boolean dirty;
28+
29+
public WriteStateException(boolean dirty, String message, Exception cause) {
30+
super(message, cause);
31+
this.dirty = dirty;
32+
}
33+
34+
public boolean isDirty() {
35+
return dirty;
36+
}
37+
}

server/src/main/java/org/elasticsearch/index/shard/IndexShard.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
import org.elasticsearch.common.util.concurrent.AsyncIOProcessor;
6565
import org.elasticsearch.common.xcontent.XContentHelper;
6666
import org.elasticsearch.core.internal.io.IOUtils;
67+
import org.elasticsearch.gateway.WriteStateException;
6768
import org.elasticsearch.index.Index;
6869
import org.elasticsearch.index.IndexModule;
6970
import org.elasticsearch.index.IndexNotFoundException;
@@ -2243,7 +2244,11 @@ private static void persistMetadata(
22432244
logger.trace("{} writing shard state, reason [{}]", shardId, writeReason);
22442245
final ShardStateMetaData newShardStateMetadata =
22452246
new ShardStateMetaData(newRouting.primary(), indexSettings.getUUID(), newRouting.allocationId());
2246-
ShardStateMetaData.FORMAT.write(newShardStateMetadata, shardPath.getShardStatePath());
2247+
try {
2248+
ShardStateMetaData.FORMAT.write(newShardStateMetadata, shardPath.getShardStatePath());
2249+
} catch (WriteStateException e) {
2250+
throw new IOException(e);
2251+
}
22472252
} else {
22482253
logger.trace("{} skip writing shard state, has been written before", shardId);
22492254
}

server/src/main/java/org/elasticsearch/index/shard/RemoveCorruptedShardDataCommand.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
import org.elasticsearch.env.NodeEnvironment;
5454
import org.elasticsearch.env.NodeMetaData;
5555
import org.elasticsearch.gateway.MetaDataStateFormat;
56+
import org.elasticsearch.gateway.WriteStateException;
5657
import org.elasticsearch.index.Index;
5758
import org.elasticsearch.index.IndexSettings;
5859
import org.elasticsearch.index.engine.Engine;
@@ -460,8 +461,11 @@ protected void newAllocationId(Environment environment, ShardPath shardPath, Ter
460461
final ShardStateMetaData newShardStateMetaData =
461462
new ShardStateMetaData(shardStateMetaData.primary, shardStateMetaData.indexUUID, newAllocationId);
462463

463-
ShardStateMetaData.FORMAT.write(newShardStateMetaData, shardStatePath);
464-
464+
try {
465+
ShardStateMetaData.FORMAT.write(newShardStateMetaData, shardStatePath);
466+
} catch (WriteStateException e) {
467+
throw new IOException(e);
468+
}
465469
terminal.println("");
466470
terminal.println("You should run the following command to allocate this shard:");
467471

0 commit comments

Comments
 (0)