diff --git a/server/src/main/java/org/elasticsearch/monitor/fs/FsHealthService.java b/server/src/main/java/org/elasticsearch/monitor/fs/FsHealthService.java index aa91f550339ae..2f1d00fb98760 100644 --- a/server/src/main/java/org/elasticsearch/monitor/fs/FsHealthService.java +++ b/server/src/main/java/org/elasticsearch/monitor/fs/FsHealthService.java @@ -11,7 +11,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; -import org.elasticsearch.common.Nullable; import org.elasticsearch.common.UUIDs; import org.elasticsearch.common.component.AbstractLifecycleComponent; import org.elasticsearch.common.settings.ClusterSettings; @@ -30,32 +29,26 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; -import java.util.Set; import java.util.function.LongSupplier; -import java.util.stream.Collectors; import static org.elasticsearch.monitor.StatusInfo.Status.HEALTHY; import static org.elasticsearch.monitor.StatusInfo.Status.UNHEALTHY; /** - * Runs periodically and attempts to create a temp file to see if the filesystem is writable. If not then it marks the - * path as unhealthy. + * Runs periodically and attempts to create a temp file to see if the filesystem is writable. If not then it marks the path as unhealthy. */ public class FsHealthService extends AbstractLifecycleComponent implements NodeHealthService { private static final Logger logger = LogManager.getLogger(FsHealthService.class); private final ThreadPool threadPool; + private volatile StatusInfo statusInfo = new StatusInfo(HEALTHY, "not started"); private volatile boolean enabled; - private volatile boolean brokenLock; private final TimeValue refreshInterval; private volatile TimeValue slowPathLoggingThreshold; private final NodeEnvironment nodeEnv; private final LongSupplier currentTimeMillisSupplier; private volatile Scheduler.Cancellable scheduledFuture; - @Nullable - private volatile Set unhealthyPaths; - public static final Setting ENABLED_SETTING = Setting.boolSetting("monitor.fs.health.enabled", true, Setting.Property.NodeScope, Setting.Property.Dynamic); public static final Setting REFRESH_INTERVAL_SETTING = @@ -79,8 +72,7 @@ public FsHealthService(Settings settings, ClusterSettings clusterSettings, Threa @Override protected void doStart() { - scheduledFuture = threadPool.scheduleWithFixedDelay(new FsHealthMonitor(), refreshInterval, - ThreadPool.Names.GENERIC); + scheduledFuture = threadPool.scheduleWithFixedDelay(new FsHealthMonitor(), refreshInterval, ThreadPool.Names.GENERIC); } @Override @@ -102,29 +94,20 @@ public void setSlowPathLoggingThreshold(TimeValue slowPathLoggingThreshold) { @Override public StatusInfo getHealth() { - StatusInfo statusInfo; - Set unhealthyPaths = this.unhealthyPaths; if (enabled == false) { - statusInfo = new StatusInfo(HEALTHY, "health check disabled"); - } else if (brokenLock) { - statusInfo = new StatusInfo(UNHEALTHY, "health check failed due to broken node lock"); - } else if (unhealthyPaths == null) { - statusInfo = new StatusInfo(HEALTHY, "health check passed"); - } else { - String info = "health check failed on [" + unhealthyPaths.stream() - .map(k -> k.toString()).collect(Collectors.joining(",")) + "]"; - statusInfo = new StatusInfo(UNHEALTHY, info); + return new StatusInfo(HEALTHY, "health check disabled"); } + return statusInfo; } - class FsHealthMonitor implements Runnable { + class FsHealthMonitor implements Runnable { static final String TEMP_FILE_NAME = ".es_temp_file"; - private byte[] byteToWrite; + private final byte[] bytesToWrite; FsHealthMonitor(){ - this.byteToWrite = UUIDs.randomBase64UUID().getBytes(StandardCharsets.UTF_8); + this.bytesToWrite = UUIDs.randomBase64UUID().getBytes(StandardCharsets.UTF_8); } @Override @@ -140,23 +123,22 @@ public void run() { } private void monitorFSHealth() { - Path path; + final Path path; try { path = nodeEnv.nodeDataPath(); } catch (IllegalStateException e) { + statusInfo = new StatusInfo(UNHEALTHY, "health check failed due to broken node lock"); logger.error("health check failed", e); - brokenLock = true; return; } - brokenLock = false; - long executionStartTime = currentTimeMillisSupplier.getAsLong(); + final long executionStartTime = currentTimeMillisSupplier.getAsLong(); try { if (Files.exists(path)) { - Path tempDataPath = path.resolve(TEMP_FILE_NAME); + final Path tempDataPath = path.resolve(TEMP_FILE_NAME); Files.deleteIfExists(tempDataPath); try (OutputStream os = Files.newOutputStream(tempDataPath, StandardOpenOption.CREATE_NEW)) { - os.write(byteToWrite); + os.write(bytesToWrite); IOUtils.fsync(tempDataPath, false); } Files.delete(tempDataPath); @@ -167,11 +149,12 @@ private void monitorFSHealth() { } } } catch (Exception ex) { + statusInfo = new StatusInfo(UNHEALTHY, "health check failed on [" + path + "]"); logger.error(new ParameterizedMessage("health check of [{}] failed", path), ex); - unhealthyPaths = Set.of(path); return; } - unhealthyPaths = null; + + statusInfo = new StatusInfo(HEALTHY, "health check passed"); } } }