diff --git a/cmd/traffic_ctl/server.cc b/cmd/traffic_ctl/server.cc index 8e77c2a6e39..d41064ffa61 100644 --- a/cmd/traffic_ctl/server.cc +++ b/cmd/traffic_ctl/server.cc @@ -169,16 +169,49 @@ server_start(unsigned argc, const char **argv) return CTRL_EX_OK; } +static int +server_drain(unsigned argc, const char **argv) +{ + TSMgmtError error; + const char *usage = "server drain [OPTIONS]"; + + int no_new_connection = 0; + int undo = 0; + const ArgumentDescription opts[] = { + {"no-new-connection", 'N', "Wait for new connections down to threshold before starting draining", "F", &no_new_connection, + nullptr, nullptr}, + {"undo", 'U', "Recover server from the drain mode", "F", &undo, nullptr, nullptr}, + }; + + if (!CtrlProcessArguments(argc, argv, opts, countof(opts)) || n_file_arguments != 0) { + return CtrlCommandUsage(usage, opts, countof(opts)); + } + + if (undo) { + error = TSDrain(TS_DRAIN_OPT_UNDO); + } else if (no_new_connection) { + error = TSDrain(TS_DRAIN_OPT_IDLE); + } else { + error = TSDrain(TS_DRAIN_OPT_NONE); + } + + if (error != TS_ERR_OKAY) { + CtrlMgmtError(error, "server drain failed"); + return CTRL_EX_ERROR; + } + + return CTRL_EX_OK; +} + int subcommand_server(unsigned argc, const char **argv) { - const subcommand commands[] = { - {server_backtrace, "backtrace", "Show a full stack trace of the traffic_server process"}, - {server_restart, "restart", "Restart Traffic Server"}, - {server_start, "start", "Start the proxy"}, - {server_status, "status", "Show the proxy status"}, - {server_stop, "stop", "Stop the proxy"}, - }; + const subcommand commands[] = {{server_backtrace, "backtrace", "Show a full stack trace of the traffic_server process"}, + {server_restart, "restart", "Restart Traffic Server"}, + {server_start, "start", "Start the proxy"}, + {server_status, "status", "Show the proxy status"}, + {server_stop, "stop", "Stop the proxy"}, + {server_drain, "drain", "Drain the requests"}}; return CtrlGenericSubcommand("server", commands, countof(commands), argc, argv); } diff --git a/cmd/traffic_manager/traffic_manager.cc b/cmd/traffic_manager/traffic_manager.cc index 0e11d4e2852..24df10160cd 100644 --- a/cmd/traffic_manager/traffic_manager.cc +++ b/cmd/traffic_manager/traffic_manager.cc @@ -166,6 +166,18 @@ is_server_idle() return active <= threshold; } +static bool +is_server_idle_from_new_connection() +{ + RecInt active = 0; + RecInt threshold = 0; + // TODO implement with the right metric + + Debug("lm", "%" PRId64 " active clients, threshold is %" PRId64, active, threshold); + + return active <= threshold; +} + static bool is_server_draining() { @@ -783,6 +795,23 @@ main(int argc, const char **argv) lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE; } break; + case MGMT_PENDING_DRAIN: + if (!is_server_draining()) { + lmgmt->processDrain(); + } + lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE; + break; + case MGMT_PENDING_IDLE_DRAIN: + if (is_server_idle_from_new_connection()) { + lmgmt->processDrain(); + lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE; + } + break; + case MGMT_PENDING_UNDO_DRAIN: + if (is_server_draining()) { + lmgmt->processDrain(0); + lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE; + } default: break; } diff --git a/mgmt/LocalManager.cc b/mgmt/LocalManager.cc index ac466d1e9da..a8e0a065e99 100644 --- a/mgmt/LocalManager.cc +++ b/mgmt/LocalManager.cc @@ -99,10 +99,10 @@ LocalManager::processBounce() } void -LocalManager::processDrain() +LocalManager::processDrain(int to_drain) { mgmt_log("[LocalManager::processDrain] Executing process drain request.\n"); - signalEvent(MGMT_EVENT_DRAIN, "processDrain"); + signalEvent(MGMT_EVENT_DRAIN, to_drain ? "1" : "0"); return; } diff --git a/mgmt/LocalManager.h b/mgmt/LocalManager.h index 87ed6e829ef..524b086a815 100644 --- a/mgmt/LocalManager.h +++ b/mgmt/LocalManager.h @@ -49,9 +49,12 @@ enum ManagementPendingOperation { MGMT_PENDING_RESTART, // Restart TS and TM MGMT_PENDING_BOUNCE, // Restart TS MGMT_PENDING_STOP, // Stop TS + MGMT_PENDING_DRAIN, // Drain TS MGMT_PENDING_IDLE_RESTART, // Restart TS and TM when TS is idle MGMT_PENDING_IDLE_BOUNCE, // Restart TS when TS is idle - MGMT_PENDING_IDLE_STOP // Stop TS when TS is idle + MGMT_PENDING_IDLE_STOP, // Stop TS when TS is idle + MGMT_PENDING_IDLE_DRAIN, // Drain TS when TS is idle from new connections + MGMT_PENDING_UNDO_DRAIN, // Recover TS from drain }; class LocalManager : public BaseManager @@ -85,7 +88,7 @@ class LocalManager : public BaseManager void processShutdown(bool mainThread = false); void processRestart(); void processBounce(); - void processDrain(); + void processDrain(int to_drain = 1); void rollLogFiles(); void clearStats(const char *name = NULL); @@ -99,6 +102,7 @@ class LocalManager : public BaseManager bool proxy_launch_outstanding = false; ManagementPendingOperation mgmt_shutdown_outstanding = MGMT_PENDING_NONE; time_t mgmt_shutdown_triggered_at; + time_t mgmt_drain_triggered_at; int proxy_running = 0; HttpProxyPort::Group m_proxy_ports; // Local inbound addresses to bind, if set. diff --git a/mgmt/ProcessManager.cc b/mgmt/ProcessManager.cc index db97f3f388e..7965e0dc1e0 100644 --- a/mgmt/ProcessManager.cc +++ b/mgmt/ProcessManager.cc @@ -382,7 +382,7 @@ ProcessManager::handleMgmtMsgFromLM(MgmtMessageHdr *mh) signalMgmtEntity(MGMT_EVENT_RESTART); break; case MGMT_EVENT_DRAIN: - signalMgmtEntity(MGMT_EVENT_DRAIN); + signalMgmtEntity(MGMT_EVENT_DRAIN, data_raw, mh->data_len); break; case MGMT_EVENT_CLEAR_STATS: signalMgmtEntity(MGMT_EVENT_CLEAR_STATS); diff --git a/mgmt/api/CoreAPI.cc b/mgmt/api/CoreAPI.cc index d407d6d5baf..8db92a8ca0d 100644 --- a/mgmt/api/CoreAPI.cc +++ b/mgmt/api/CoreAPI.cc @@ -431,11 +431,35 @@ TSMgmtError Stop(unsigned options) { lmgmt->mgmt_shutdown_triggered_at = time(nullptr); - lmgmt->mgmt_shutdown_outstanding = (options & TS_STOP_OPT_DRAIN) ? MGMT_PENDING_IDLE_STOP : MGMT_PENDING_STOP; + lmgmt->mgmt_shutdown_outstanding = (options & TS_STOP_OPT_DRAIN) ? MGMT_PENDING_IDLE_STOP : MGMT_PENDING_STOP; return TS_ERR_OKAY; } +/*------------------------------------------------------------------------- + * Drain + *------------------------------------------------------------------------- + * Drain requests of traffic_server + */ +TSMgmtError +Drain(unsigned options) +{ + switch (options) { + case TS_DRAIN_OPT_NONE: + lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_DRAIN; + break; + case TS_DRAIN_OPT_IDLE: + lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_IDLE_DRAIN; + break; + case TS_DRAIN_OPT_UNDO: + lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_UNDO_DRAIN; + break; + default: + ink_release_assert(!"Not expected to reach here"); + } + return TS_ERR_OKAY; +} + /*------------------------------------------------------------------------- * StorageDeviceCmdOffline *------------------------------------------------------------------------- diff --git a/mgmt/api/CoreAPI.h b/mgmt/api/CoreAPI.h index ede35d52a0f..04838da5809 100644 --- a/mgmt/api/CoreAPI.h +++ b/mgmt/api/CoreAPI.h @@ -47,6 +47,7 @@ TSMgmtError Reconfigure(); TSMgmtError Restart(unsigned options); // restart TM TSMgmtError Bounce(unsigned options); // restart traffic_server TSMgmtError Stop(unsigned options); // stop traffic_server +TSMgmtError Drain(unsigned options); // drain requests of traffic_server TSMgmtError StorageDeviceCmdOffline(const char *dev); // Storage device operation. TSMgmtError LifecycleMessage(const char *tag, void const *data, size_t data_size); // Lifecycle alert to plugins. diff --git a/mgmt/api/CoreAPIRemote.cc b/mgmt/api/CoreAPIRemote.cc index cf1f928b0ee..686df8426ef 100644 --- a/mgmt/api/CoreAPIRemote.cc +++ b/mgmt/api/CoreAPIRemote.cc @@ -449,6 +449,23 @@ Stop(unsigned options) return (ret == TS_ERR_OKAY) ? parse_generic_response(OpType::STOP, main_socket_fd) : ret; } +/*------------------------------------------------------------------------- + * Drain + *------------------------------------------------------------------------- + * Drain requests of the traffic_server process(es) only. + */ +TSMgmtError +Drain(unsigned options) +{ + TSMgmtError ret; + OpType optype = OpType::DRAIN; + MgmtMarshallInt oval = options; + + ret = MGMTAPI_SEND_MESSAGE(main_socket_fd, OpType::DRAIN, &optype, &oval); + + return (ret == TS_ERR_OKAY) ? parse_generic_response(OpType::DRAIN, main_socket_fd) : ret; +} + /*------------------------------------------------------------------------- * StorageDeviceCmdOffline *------------------------------------------------------------------------- diff --git a/mgmt/api/INKMgmtAPI.cc b/mgmt/api/INKMgmtAPI.cc index 0db45b31fd9..40500048bdf 100644 --- a/mgmt/api/INKMgmtAPI.cc +++ b/mgmt/api/INKMgmtAPI.cc @@ -1679,6 +1679,12 @@ TSStop(unsigned options) return Stop(options); } +tsapi TSMgmtError +TSDrain(unsigned options) +{ + return Drain(options); +} + tsapi TSMgmtError TSStorageDeviceCmdOffline(const char *dev) { diff --git a/mgmt/api/NetworkMessage.cc b/mgmt/api/NetworkMessage.cc index a7fc64950cf..a0b86d578a6 100644 --- a/mgmt/api/NetworkMessage.cc +++ b/mgmt/api/NetworkMessage.cc @@ -46,6 +46,8 @@ static const struct NetCmdOperation requests[] = { /* RECONFIGURE */ {1, {MGMT_MARSHALL_INT}}, /* RESTART */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_INT}}, /* BOUNCE */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_INT}}, + /* STOP */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_INT}}, + /* DRAIN */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_INT}}, /* EVENT_RESOLVE */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_STRING}}, /* EVENT_GET_MLT */ {1, {MGMT_MARSHALL_INT}}, /* EVENT_ACTIVE */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_STRING}}, @@ -73,6 +75,8 @@ static const struct NetCmdOperation responses[] = { /* RECONFIGURE */ {1, {MGMT_MARSHALL_INT}}, /* RESTART */ {1, {MGMT_MARSHALL_INT}}, /* BOUNCE */ {1, {MGMT_MARSHALL_INT}}, + /* STOP */ {1, {MGMT_MARSHALL_INT}}, + /* DRAIN */ {1, {MGMT_MARSHALL_INT}}, /* EVENT_RESOLVE */ {1, {MGMT_MARSHALL_INT}}, /* EVENT_GET_MLT */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_STRING}}, /* EVENT_ACTIVE */ {2, {MGMT_MARSHALL_INT, MGMT_MARSHALL_INT}}, @@ -193,6 +197,7 @@ send_mgmt_error(int fd, OpType optype, TSMgmtError error) switch (optype) { case OpType::BOUNCE: case OpType::STOP: + case OpType::DRAIN: case OpType::EVENT_RESOLVE: case OpType::LIFECYCLE_MESSAGE: case OpType::PROXY_STATE_SET: diff --git a/mgmt/api/NetworkMessage.h b/mgmt/api/NetworkMessage.h index 9e591a63dcb..daa75106e47 100644 --- a/mgmt/api/NetworkMessage.h +++ b/mgmt/api/NetworkMessage.h @@ -42,6 +42,7 @@ enum class OpType : MgmtMarshallInt { RESTART, BOUNCE, STOP, + DRAIN, EVENT_RESOLVE, EVENT_GET_MLT, EVENT_ACTIVE, diff --git a/mgmt/api/TSControlMain.cc b/mgmt/api/TSControlMain.cc index 5d0a3022ea3..29aa647fe93 100644 --- a/mgmt/api/TSControlMain.cc +++ b/mgmt/api/TSControlMain.cc @@ -660,6 +660,28 @@ handle_stop(int fd, void *req, size_t reqlen) return send_mgmt_response(fd, OpType::STOP, &err); } +/************************************************************************** + * handle_drain + * + * purpose: handles request to drain TS + * output: TS_ERR_xx + * note: None + *************************************************************************/ +static TSMgmtError +handle_drain(int fd, void *req, size_t reqlen) +{ + OpType optype; + MgmtMarshallInt options; + MgmtMarshallInt err; + + err = recv_mgmt_request(req, reqlen, OpType::DRAIN, &optype, &options); + if (err == TS_ERR_OKAY) { + err = Drain(options); + } + + return send_mgmt_response(fd, OpType::DRAIN, &err); +} + /************************************************************************** * handle_storage_device_cmd_offline * @@ -1019,6 +1041,7 @@ static const control_message_handler handlers[] = { /* RESTART */ {MGMT_API_PRIVILEGED, handle_restart}, /* BOUNCE */ {MGMT_API_PRIVILEGED, handle_restart}, /* STOP */ {MGMT_API_PRIVILEGED, handle_stop}, + /* DRAIN */ {MGMT_API_PRIVILEGED, handle_drain}, /* EVENT_RESOLVE */ {MGMT_API_PRIVILEGED, handle_event_resolve}, /* EVENT_GET_MLT */ {0, handle_event_get_mlt}, /* EVENT_ACTIVE */ {0, handle_event_active}, diff --git a/mgmt/api/include/mgmtapi.h b/mgmt/api/include/mgmtapi.h index 4567c89a3f8..d0646d95a7d 100644 --- a/mgmt/api/include/mgmtapi.h +++ b/mgmt/api/include/mgmtapi.h @@ -333,6 +333,12 @@ typedef enum { TS_STOP_OPT_DRAIN, /* Wait for traffic to drain before stopping. */ } TSStopOptionT; +typedef enum { + TS_DRAIN_OPT_NONE = 0x0, + TS_DRAIN_OPT_IDLE, /* Wait for idle from new connections before draining. */ + TS_DRAIN_OPT_UNDO, /* Recover TS from drain mode */ +} TSDrainOptionT; + /*************************************************************************** * Structures ***************************************************************************/ @@ -840,6 +846,12 @@ tsapi TSMgmtError TSBounce(unsigned options); */ tsapi TSMgmtError TSStop(unsigned options); +/* TSDrain: drain requests of the traffic_server process. + * Input: options - TSDrainOptionT + * Output TSMgmtError + */ +tsapi TSMgmtError TSDrain(unsigned options); + /* TSStorageDeviceCmdOffline: Request to make a cache storage device offline. * @arg dev Target device, specified by path to device. * @return Success. diff --git a/proxy/Main.cc b/proxy/Main.cc index 5ac3bbcfee9..7c6a617e185 100644 --- a/proxy/Main.cc +++ b/proxy/Main.cc @@ -1992,9 +1992,10 @@ mgmt_restart_shutdown_callback(void *, char *, int /* data_len ATS_UNUSED */) } static void * -mgmt_drain_callback(void *, char *, int /* data_len ATS_UNUSED */) +mgmt_drain_callback(void *, char *arg, int len) { - RecSetRecordInt("proxy.node.config.draining", 1, REC_SOURCE_DEFAULT); + ink_assert(len > 1 && (arg[0] == '0' || arg[0] == '1')); + RecSetRecordInt("proxy.node.config.draining", arg[0] == '1', REC_SOURCE_DEFAULT); return nullptr; } diff --git a/proxy/http2/Http2ClientSession.cc b/proxy/http2/Http2ClientSession.cc index b79ab444a13..651d165ad3b 100644 --- a/proxy/http2/Http2ClientSession.cc +++ b/proxy/http2/Http2ClientSession.cc @@ -343,6 +343,10 @@ Http2ClientSession::main_event_handler(int event, void *edata) break; } + if (!this->is_draining()) { + this->connection_state.set_shutdown_state(HTTP2_SHUTDOWN_NONE); + } + // For a case we already checked Connection header and it didn't exist if (this->is_draining() && this->connection_state.get_shutdown_state() == HTTP2_SHUTDOWN_NONE) { this->connection_state.set_shutdown_state(HTTP2_SHUTDOWN_NOT_INITIATED);