Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 54 additions & 14 deletions cmd/traffic_ctl/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -115,16 +115,23 @@ static int
server_stop(unsigned argc, const char **argv)
{
TSMgmtError error;
const char *usage = "server stop [OPTIONS]";
unsigned flags = TS_RESTART_OPT_NONE;

// I am not sure whether it really makes sense to add the --drain option here.
// TSProxyStateSet() is a synchronous API, returning only after the proxy has
// been shut down. However, draining can take a long time and we don't want
// to wait for it. Maybe the right approach is to make the stop async.
if (!CtrlProcessArguments(argc, argv, nullptr, 0) || n_file_arguments != 0) {
return CtrlCommandUsage("server stop");
const ArgumentDescription opts[] = {
{"drain", '-', "Wait for client connections to drain before stopping", "F", &drain, nullptr, nullptr},
};

if (!CtrlProcessArguments(argc, argv, opts, countof(opts)) || n_file_arguments != 0) {
return CtrlCommandUsage(usage, opts, countof(opts));
}

error = TSProxyStateSet(TS_PROXY_OFF, TS_CACHE_CLEAR_NONE);
if (drain) {
flags |= TS_STOP_OPT_DRAIN;
}

error = TSStop(flags);

if (error != TS_ERR_OKAY) {
CtrlMgmtError(error, "server stop failed");
return CTRL_EX_ERROR;
Expand Down Expand Up @@ -162,16 +169,49 @@ server_start(unsigned argc, const char **argv)
return CTRL_EX_OK;
}

static int
server_drain(unsigned argc, const char **argv)
{
TSMgmtError error;
const char *usage = "server drain [OPTIONS]";

int no_new_connection = 0;
int undo = 0;
const ArgumentDescription opts[] = {
{"no-new-connection", 'N', "Wait for new connections down to threshold before starting draining", "F", &no_new_connection,
nullptr, nullptr},
{"undo", 'U', "Recover server from the drain mode", "F", &undo, nullptr, nullptr},
};

if (!CtrlProcessArguments(argc, argv, opts, countof(opts)) || n_file_arguments != 0) {
return CtrlCommandUsage(usage, opts, countof(opts));
}

if (undo) {
error = TSDrain(TS_DRAIN_OPT_UNDO);
} else if (no_new_connection) {
error = TSDrain(TS_DRAIN_OPT_IDLE);
} else {
error = TSDrain(TS_DRAIN_OPT_NONE);
}

if (error != TS_ERR_OKAY) {
CtrlMgmtError(error, "server drain failed");
return CTRL_EX_ERROR;
}

return CTRL_EX_OK;
}

int
subcommand_server(unsigned argc, const char **argv)
{
const subcommand commands[] = {
{server_backtrace, "backtrace", "Show a full stack trace of the traffic_server process"},
{server_restart, "restart", "Restart Traffic Server"},
{server_start, "start", "Start the proxy"},
{server_status, "status", "Show the proxy status"},
{server_stop, "stop", "Stop the proxy"},
};
const subcommand commands[] = {{server_backtrace, "backtrace", "Show a full stack trace of the traffic_server process"},
{server_restart, "restart", "Restart Traffic Server"},
{server_start, "start", "Start the proxy"},
{server_status, "status", "Show the proxy status"},
{server_stop, "stop", "Stop the proxy"},
{server_drain, "drain", "Drain the requests"}};

return CtrlGenericSubcommand("server", commands, countof(commands), argc, argv);
}
76 changes: 74 additions & 2 deletions cmd/traffic_manager/traffic_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,39 @@ is_server_idle()
return active <= threshold;
}

static bool
is_server_idle_from_new_connection()
{
RecInt active = 0;
RecInt threshold = 0;
// TODO implement with the right metric

Debug("lm", "%" PRId64 " active clients, threshold is %" PRId64, active, threshold);

return active <= threshold;
}

static bool
is_server_draining()
{
RecInt draining = 0;
if (RecGetRecordInt("proxy.node.config.draining", &draining) != REC_ERR_OKAY) {
return false;
}
return draining != 0;
}

static bool
waited_enough()
{
RecInt timeout = 0;
if (RecGetRecordInt("proxy.config.stop.shutdown_timeout", &timeout) != REC_ERR_OKAY) {
return false;
}

return (lmgmt->mgmt_shutdown_triggered_at + timeout >= time(nullptr));
}

static void
check_lockfile()
{
Expand Down Expand Up @@ -682,6 +715,8 @@ main(int argc, const char **argv)
RecRegisterStatInt(RECT_NODE, "proxy.node.config.restart_required.manager", 0, RECP_NON_PERSISTENT);
RecRegisterStatInt(RECT_NODE, "proxy.node.config.restart_required.cop", 0, RECP_NON_PERSISTENT);

RecRegisterStatInt(RECT_NODE, "proxy.node.config.draining", 0, RECP_NON_PERSISTENT);

binding = new BindingInstance;
metrics_binding_initialize(*binding);
metrics_binding_configure(*binding);
Expand Down Expand Up @@ -727,7 +762,10 @@ main(int argc, const char **argv)
::exit(0);
break;
case MGMT_PENDING_IDLE_RESTART:
if (is_server_idle()) {
if (!is_server_draining()) {
lmgmt->processDrain();
}
if (is_server_idle() || waited_enough()) {
lmgmt->mgmtShutdown();
::exit(0);
}
Expand All @@ -737,11 +775,45 @@ main(int argc, const char **argv)
lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
break;
case MGMT_PENDING_IDLE_BOUNCE:
if (is_server_idle()) {
if (!is_server_draining()) {
lmgmt->processDrain();
}
if (is_server_idle() || waited_enough()) {
lmgmt->processBounce();
lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
}
break;
case MGMT_PENDING_STOP:
lmgmt->processShutdown();
lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
break;
case MGMT_PENDING_IDLE_STOP:
if (!is_server_draining()) {
lmgmt->processDrain();
}
if (is_server_idle() || waited_enough()) {
lmgmt->processShutdown();
lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
}
break;
case MGMT_PENDING_DRAIN:
if (!is_server_draining()) {
lmgmt->processDrain();
}
lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
break;
case MGMT_PENDING_IDLE_DRAIN:
if (is_server_idle_from_new_connection()) {
lmgmt->processDrain();
lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
}
break;
case MGMT_PENDING_UNDO_DRAIN:
if (is_server_draining()) {
lmgmt->processDrain(0);
lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
}
break;
default:
break;
}
Expand Down
1 change: 1 addition & 0 deletions mgmt/BaseManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
// case statement.
#define MGMT_EVENT_STORAGE_DEVICE_CMD_OFFLINE 10011
#define MGMT_EVENT_LIFECYCLE_MESSAGE 10012
#define MGMT_EVENT_DRAIN 10013

/***********************************************************************
*
Expand Down
8 changes: 8 additions & 0 deletions mgmt/LocalManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,14 @@ LocalManager::processBounce()
return;
}

void
LocalManager::processDrain(int to_drain)
{
mgmt_log("[LocalManager::processDrain] Executing process drain request.\n");
signalEvent(MGMT_EVENT_DRAIN, to_drain ? "1" : "0");
return;
}

void
LocalManager::rollLogFiles()
{
Expand Down
12 changes: 10 additions & 2 deletions mgmt/LocalManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,13 @@ enum ManagementPendingOperation {
MGMT_PENDING_NONE, // Do nothing
MGMT_PENDING_RESTART, // Restart TS and TM
MGMT_PENDING_BOUNCE, // Restart TS
MGMT_PENDING_STOP, // Stop TS
MGMT_PENDING_DRAIN, // Drain TS
MGMT_PENDING_IDLE_RESTART, // Restart TS and TM when TS is idle
MGMT_PENDING_IDLE_BOUNCE // Restart TS when TS is idle
MGMT_PENDING_IDLE_BOUNCE, // Restart TS when TS is idle
MGMT_PENDING_IDLE_STOP, // Stop TS when TS is idle
MGMT_PENDING_IDLE_DRAIN, // Drain TS when TS is idle from new connections
MGMT_PENDING_UNDO_DRAIN, // Recover TS from drain
};

class LocalManager : public BaseManager
Expand Down Expand Up @@ -83,6 +88,7 @@ class LocalManager : public BaseManager
void processShutdown(bool mainThread = false);
void processRestart();
void processBounce();
void processDrain(int to_drain = 1);
void rollLogFiles();
void clearStats(const char *name = NULL);

Expand All @@ -95,7 +101,9 @@ class LocalManager : public BaseManager
int proxy_launch_count = 0;
bool proxy_launch_outstanding = false;
ManagementPendingOperation mgmt_shutdown_outstanding = MGMT_PENDING_NONE;
int proxy_running = 0;
time_t mgmt_shutdown_triggered_at;
time_t mgmt_drain_triggered_at;
int proxy_running = 0;
HttpProxyPort::Group m_proxy_ports;
// Local inbound addresses to bind, if set.
IpAddr m_inbound_ip4;
Expand Down
3 changes: 3 additions & 0 deletions mgmt/ProcessManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,9 @@ ProcessManager::handleMgmtMsgFromLM(MgmtMessageHdr *mh)
case MGMT_EVENT_RESTART:
signalMgmtEntity(MGMT_EVENT_RESTART);
break;
case MGMT_EVENT_DRAIN:
signalMgmtEntity(MGMT_EVENT_DRAIN, data_raw, mh->data_len);
break;
case MGMT_EVENT_CLEAR_STATS:
signalMgmtEntity(MGMT_EVENT_CLEAR_STATS);
break;
Expand Down
44 changes: 42 additions & 2 deletions mgmt/api/CoreAPI.cc
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,8 @@ Reconfigure()
TSMgmtError
Restart(unsigned options)
{
lmgmt->mgmt_shutdown_outstanding = (options & TS_RESTART_OPT_DRAIN) ? MGMT_PENDING_IDLE_RESTART : MGMT_PENDING_RESTART;
lmgmt->mgmt_shutdown_triggered_at = time(nullptr);
lmgmt->mgmt_shutdown_outstanding = (options & TS_RESTART_OPT_DRAIN) ? MGMT_PENDING_IDLE_RESTART : MGMT_PENDING_RESTART;

return TS_ERR_OKAY;
}
Expand All @@ -416,11 +417,50 @@ Restart(unsigned options)
TSMgmtError
Bounce(unsigned options)
{
lmgmt->mgmt_shutdown_outstanding = (options & TS_RESTART_OPT_DRAIN) ? MGMT_PENDING_IDLE_BOUNCE : MGMT_PENDING_BOUNCE;
lmgmt->mgmt_shutdown_triggered_at = time(nullptr);
lmgmt->mgmt_shutdown_outstanding = (options & TS_RESTART_OPT_DRAIN) ? MGMT_PENDING_IDLE_BOUNCE : MGMT_PENDING_BOUNCE;

return TS_ERR_OKAY;
}

/*-------------------------------------------------------------------------
* Stop
*-------------------------------------------------------------------------
* Stops traffic_server process(es).
*/
TSMgmtError
Stop(unsigned options)
{
lmgmt->mgmt_shutdown_triggered_at = time(nullptr);
lmgmt->mgmt_shutdown_outstanding = (options & TS_STOP_OPT_DRAIN) ? MGMT_PENDING_IDLE_STOP : MGMT_PENDING_STOP;

return TS_ERR_OKAY;
}

/*-------------------------------------------------------------------------
* Drain
*-------------------------------------------------------------------------
* Drain requests of traffic_server
*/
TSMgmtError
Drain(unsigned options)
{
switch (options) {
case TS_DRAIN_OPT_NONE:
lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_DRAIN;
break;
case TS_DRAIN_OPT_IDLE:
lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_IDLE_DRAIN;
break;
case TS_DRAIN_OPT_UNDO:
lmgmt->mgmt_shutdown_outstanding = MGMT_PENDING_UNDO_DRAIN;
break;
default:
ink_release_assert(!"Not expected to reach here");
}
return TS_ERR_OKAY;
}

/*-------------------------------------------------------------------------
* StorageDeviceCmdOffline
*-------------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions mgmt/api/CoreAPI.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ TSMgmtError ServerBacktrace(unsigned options, char **trace);
TSMgmtError Reconfigure(); // TS reread config files
TSMgmtError Restart(unsigned options); // restart TM
TSMgmtError Bounce(unsigned options); // restart traffic_server
TSMgmtError Stop(unsigned options); // stop traffic_server
TSMgmtError Drain(unsigned options); // drain requests of traffic_server
TSMgmtError StorageDeviceCmdOffline(const char *dev); // Storage device operation.
TSMgmtError LifecycleMessage(const char *tag, void const *data, size_t data_size); // Lifecycle alert to plugins.

Expand Down
34 changes: 34 additions & 0 deletions mgmt/api/CoreAPIRemote.cc
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,40 @@ Bounce(unsigned options)
return (ret == TS_ERR_OKAY) ? parse_generic_response(OpType::BOUNCE, main_socket_fd) : ret;
}

/*-------------------------------------------------------------------------
* Stop
*-------------------------------------------------------------------------
* Restart the traffic_server process(es) only.
*/
TSMgmtError
Stop(unsigned options)
{
TSMgmtError ret;
OpType optype = OpType::STOP;
MgmtMarshallInt oval = options;

ret = MGMTAPI_SEND_MESSAGE(main_socket_fd, OpType::STOP, &optype, &oval);

return (ret == TS_ERR_OKAY) ? parse_generic_response(OpType::STOP, main_socket_fd) : ret;
}

/*-------------------------------------------------------------------------
* Drain
*-------------------------------------------------------------------------
* Drain requests of the traffic_server process(es) only.
*/
TSMgmtError
Drain(unsigned options)
{
TSMgmtError ret;
OpType optype = OpType::DRAIN;
MgmtMarshallInt oval = options;

ret = MGMTAPI_SEND_MESSAGE(main_socket_fd, OpType::DRAIN, &optype, &oval);

return (ret == TS_ERR_OKAY) ? parse_generic_response(OpType::DRAIN, main_socket_fd) : ret;
}

/*-------------------------------------------------------------------------
* StorageDeviceCmdOffline
*-------------------------------------------------------------------------
Expand Down
12 changes: 12 additions & 0 deletions mgmt/api/INKMgmtAPI.cc
Original file line number Diff line number Diff line change
Expand Up @@ -812,6 +812,18 @@ TSBounce(unsigned options)
return Bounce(options);
}

tsapi TSMgmtError
TSStop(unsigned options)
{
return Stop(options);
}

tsapi TSMgmtError
TSDrain(unsigned options)
{
return Drain(options);
}

tsapi TSMgmtError
TSStorageDeviceCmdOffline(const char *dev)
{
Expand Down
Loading