Skip to content

Commit

Permalink
Clean up FsHealthService after MDP removal (#73136)
Browse files Browse the repository at this point in the history
Following #72432 we now no longer need a `Set` of unhealthy paths, we
can just track the individual path directly.
  • Loading branch information
DaveCTurner authored May 17, 2021
1 parent 9061e59 commit 4268c76
Showing 1 changed file with 16 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.component.AbstractLifecycleComponent;
import org.elasticsearch.common.settings.ClusterSettings;
Expand All @@ -30,32 +29,26 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Set;
import java.util.function.LongSupplier;
import java.util.stream.Collectors;

import static org.elasticsearch.monitor.StatusInfo.Status.HEALTHY;
import static org.elasticsearch.monitor.StatusInfo.Status.UNHEALTHY;

/**
* Runs periodically and attempts to create a temp file to see if the filesystem is writable. If not then it marks the
* path as unhealthy.
* Runs periodically and attempts to create a temp file to see if the filesystem is writable. If not then it marks the path as unhealthy.
*/
public class FsHealthService extends AbstractLifecycleComponent implements NodeHealthService {

private static final Logger logger = LogManager.getLogger(FsHealthService.class);
private final ThreadPool threadPool;
private volatile StatusInfo statusInfo = new StatusInfo(HEALTHY, "not started");
private volatile boolean enabled;
private volatile boolean brokenLock;
private final TimeValue refreshInterval;
private volatile TimeValue slowPathLoggingThreshold;
private final NodeEnvironment nodeEnv;
private final LongSupplier currentTimeMillisSupplier;
private volatile Scheduler.Cancellable scheduledFuture;

@Nullable
private volatile Set<Path> unhealthyPaths;

public static final Setting<Boolean> ENABLED_SETTING =
Setting.boolSetting("monitor.fs.health.enabled", true, Setting.Property.NodeScope, Setting.Property.Dynamic);
public static final Setting<TimeValue> REFRESH_INTERVAL_SETTING =
Expand All @@ -79,8 +72,7 @@ public FsHealthService(Settings settings, ClusterSettings clusterSettings, Threa

@Override
protected void doStart() {
scheduledFuture = threadPool.scheduleWithFixedDelay(new FsHealthMonitor(), refreshInterval,
ThreadPool.Names.GENERIC);
scheduledFuture = threadPool.scheduleWithFixedDelay(new FsHealthMonitor(), refreshInterval, ThreadPool.Names.GENERIC);
}

@Override
Expand All @@ -102,29 +94,20 @@ public void setSlowPathLoggingThreshold(TimeValue slowPathLoggingThreshold) {

@Override
public StatusInfo getHealth() {
StatusInfo statusInfo;
Set<Path> unhealthyPaths = this.unhealthyPaths;
if (enabled == false) {
statusInfo = new StatusInfo(HEALTHY, "health check disabled");
} else if (brokenLock) {
statusInfo = new StatusInfo(UNHEALTHY, "health check failed due to broken node lock");
} else if (unhealthyPaths == null) {
statusInfo = new StatusInfo(HEALTHY, "health check passed");
} else {
String info = "health check failed on [" + unhealthyPaths.stream()
.map(k -> k.toString()).collect(Collectors.joining(",")) + "]";
statusInfo = new StatusInfo(UNHEALTHY, info);
return new StatusInfo(HEALTHY, "health check disabled");
}

return statusInfo;
}

class FsHealthMonitor implements Runnable {
class FsHealthMonitor implements Runnable {

static final String TEMP_FILE_NAME = ".es_temp_file";
private byte[] byteToWrite;
private final byte[] bytesToWrite;

FsHealthMonitor(){
this.byteToWrite = UUIDs.randomBase64UUID().getBytes(StandardCharsets.UTF_8);
this.bytesToWrite = UUIDs.randomBase64UUID().getBytes(StandardCharsets.UTF_8);
}

@Override
Expand All @@ -140,23 +123,22 @@ public void run() {
}

private void monitorFSHealth() {
Path path;
final Path path;
try {
path = nodeEnv.nodeDataPath();
} catch (IllegalStateException e) {
statusInfo = new StatusInfo(UNHEALTHY, "health check failed due to broken node lock");
logger.error("health check failed", e);
brokenLock = true;
return;
}

brokenLock = false;
long executionStartTime = currentTimeMillisSupplier.getAsLong();
final long executionStartTime = currentTimeMillisSupplier.getAsLong();
try {
if (Files.exists(path)) {
Path tempDataPath = path.resolve(TEMP_FILE_NAME);
final Path tempDataPath = path.resolve(TEMP_FILE_NAME);
Files.deleteIfExists(tempDataPath);
try (OutputStream os = Files.newOutputStream(tempDataPath, StandardOpenOption.CREATE_NEW)) {
os.write(byteToWrite);
os.write(bytesToWrite);
IOUtils.fsync(tempDataPath, false);
}
Files.delete(tempDataPath);
Expand All @@ -167,11 +149,12 @@ private void monitorFSHealth() {
}
}
} catch (Exception ex) {
statusInfo = new StatusInfo(UNHEALTHY, "health check failed on [" + path + "]");
logger.error(new ParameterizedMessage("health check of [{}] failed", path), ex);
unhealthyPaths = Set.of(path);
return;
}
unhealthyPaths = null;

statusInfo = new StatusInfo(HEALTHY, "health check passed");
}
}
}
Expand Down

0 comments on commit 4268c76

Please sign in to comment.