Skip to content

Commit b9eff70

Browse files
committed
graceful shutdown
1 parent 2e514e3 commit b9eff70

File tree

3 files changed

+31
-14
lines changed

3 files changed

+31
-14
lines changed

fe/fe-common/src/main/java/org/apache/doris/common/Config.java

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3620,12 +3620,6 @@ public static int metaServiceRpcRetryTimes() {
36203620
})
36213621
public static int first_error_msg_max_length = 256;
36223622

3623-
@ConfField(mutable = true, masterOnly = false, description = {
3624-
"如果设置为 false,则 health 接口将返回 503,表示此 FE 不应再接收服务请求",
3625-
"If set to false, the health endpoint will return 503, "
3626-
+ "indicating that this FE should no longer receive service requests"})
3627-
public static boolean frontend_service_available = true;
3628-
36293623
@ConfField
36303624
public static String cloud_snapshot_handler_class = "org.apache.doris.cloud.snapshot.CloudSnapshotHandler";
36313625
@ConfField

fe/fe-core/src/main/java/org/apache/doris/DorisFE.java

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
import org.apache.doris.journal.bdbje.BDBTool;
3737
import org.apache.doris.journal.bdbje.BDBToolOptions;
3838
import org.apache.doris.persist.meta.MetaReader;
39+
import org.apache.doris.qe.Coordinator;
40+
import org.apache.doris.qe.QeProcessorImpl;
3941
import org.apache.doris.qe.QeService;
4042
import org.apache.doris.qe.SimpleScheduler;
4143
import org.apache.doris.service.ExecuteEnv;
@@ -63,6 +65,7 @@
6365
import java.nio.channels.OverlappingFileLockException;
6466
import java.nio.file.StandardOpenOption;
6567
import java.time.LocalDate;
68+
import java.util.List;
6669
import java.util.concurrent.TimeUnit;
6770
import java.util.concurrent.atomic.AtomicBoolean;
6871

@@ -76,15 +79,14 @@ public class DorisFE {
7679
public static final String DORIS_HOME_DIR = System.getenv("DORIS_HOME");
7780
public static final String PID_DIR = System.getenv("PID_DIR");
7881

79-
8082
private static String LOCK_FILE_PATH;
8183

8284
private static final String LOCK_FILE_NAME = "process.lock";
8385
private static FileChannel processLockFileChannel;
8486
private static FileLock processFileLock;
8587

8688
// set to true when all servers are ready.
87-
private static AtomicBoolean serverReady = new AtomicBoolean(false);
89+
private final static AtomicBoolean serverReady = new AtomicBoolean(false);
8890

8991
public static void main(String[] args) {
9092
// Every doris version should have a final meta version, it should not change
@@ -148,7 +150,14 @@ public static void start(String dorisHomeDir, String pidDir, String[] args, Star
148150
}
149151

150152
Log4jConfig.initLogging(dorisHomeDir + "/conf/");
151-
Runtime.getRuntime().addShutdownHook(new Thread(LogManager::shutdown));
153+
154+
// Add shutdown hook for graceful exit
155+
Runtime.getRuntime().addShutdownHook(new Thread(() -> {
156+
LOG.info("Received shutdown signal, starting graceful shutdown...");
157+
serverReady.set(false);
158+
gracefulShutdown();
159+
LogManager.shutdown();
160+
}));
152161

153162
// set dns cache ttl
154163
java.security.Security.setProperty("networkaddress.cache.ttl", "60");
@@ -237,9 +246,10 @@ public static void start(String dorisHomeDir, String pidDir, String[] args, Star
237246
startMonitor();
238247

239248
serverReady.set(true);
240-
while (true) {
249+
while (serverReady.get()) {
241250
Thread.sleep(2000);
242251
}
252+
LOG.info("Doris FE main loop exited, shutting down gracefully...");
243253
} catch (Throwable e) {
244254
// Some exception may thrown before LOG is inited.
245255
// So need to print to stdout
@@ -589,4 +599,21 @@ public static class StartupOptions {
589599
public static boolean isServerReady() {
590600
return serverReady.get();
591601
}
602+
603+
private static void gracefulShutdown() {
604+
// wait for all queries to finish
605+
try {
606+
long now = System.currentTimeMillis();
607+
List<Coordinator> allCoordinators = QeProcessorImpl.INSTANCE.getAllCoordinators();
608+
while (!allCoordinators.isEmpty() && System.currentTimeMillis() - now < 300 * 1000L) {
609+
Thread.sleep(1000);
610+
allCoordinators = QeProcessorImpl.INSTANCE.getAllCoordinators();
611+
LOG.info("waiting {} queries to finish before shutdown", allCoordinators.size());
612+
}
613+
} catch (Throwable t) {
614+
LOG.error("", t);
615+
}
616+
617+
LOG.info("graceful shutdown finished");
618+
}
592619
}

fe/fe-core/src/main/java/org/apache/doris/httpv2/rest/HealthAction.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,6 @@ public Object execute(HttpServletRequest request, HttpServletResponse response)
4444
return ResponseEntityBuilder.serviceUnavailable("Server is not ready");
4545
}
4646

47-
if (!Config.frontend_service_available) {
48-
return ResponseEntityBuilder.serviceUnavailable("Server is not available");
49-
}
50-
5147
Map<String, Object> result = new HashMap<>();
5248
result.put("total_backend_num", Env.getCurrentSystemInfo().getAllBackendIds(false).size());
5349
result.put("online_backend_num", Env.getCurrentSystemInfo().getAllBackendIds(true).size());

0 commit comments

Comments
 (0)