From 8a1d9232d2caef8a5caae497027e0a31d6486a5a Mon Sep 17 00:00:00 2001 From: Sudheer Vinukonda Date: Mon, 11 May 2020 10:05:11 -0700 Subject: [PATCH 1/5] Enforce Active Connection limits 1. Throttle connections when there's no room in active conn queue 2. Adjust manage_active_queue() to not fail when the conn is already in active queue 3. Return true for PluginVC (dummy connection) add_to_active_queue 4. Metrics for throttling 5. metrics for timing out connections due to default inactivity timeout --- .../statistics/core/network-io.en.rst | 10 ++++++++ iocore/net/Net.cc | 8 +++++-- iocore/net/P_Net.h | 4 +++- iocore/net/UnixNet.cc | 24 +++++++++++++++++-- iocore/net/UnixNetVConnection.cc | 4 ---- proxy/PluginVC.cc | 2 +- proxy/http/Http1ClientSession.cc | 7 +++++- proxy/http2/Http2ConnectionState.cc | 8 ++++++- 8 files changed, 55 insertions(+), 12 deletions(-) diff --git a/doc/admin-guide/monitoring/statistics/core/network-io.en.rst b/doc/admin-guide/monitoring/statistics/core/network-io.en.rst index 5393086495a..7fd89962735 100644 --- a/doc/admin-guide/monitoring/statistics/core/network-io.en.rst +++ b/doc/admin-guide/monitoring/statistics/core/network-io.en.rst @@ -60,7 +60,17 @@ Network I/O .. ts:stat:: global proxy.process.net.connections_currently_open integer :type: counter +.. ts:stat:: global proxy.process.net.connections_throttled_in integer + :type: counter + +.. ts:stat:: global proxy.process.net.connections_throttled_out integer + :type: counter + +.. ts:stat:: global proxy.process.net.max.active.connections_throttled_in integer + :type: counter + .. ts:stat:: global proxy.process.net.default_inactivity_timeout_applied integer +.. ts:stat:: global proxy.process.net.default_inactivity_timeout_count integer .. ts:stat:: global proxy.process.net.dynamic_keep_alive_timeout_in_count integer .. ts:stat:: global proxy.process.net.dynamic_keep_alive_timeout_in_total integer .. ts:stat:: global proxy.process.net.inactivity_cop_lock_acquire_failure integer diff --git a/iocore/net/Net.cc b/iocore/net/Net.cc index f5bbb886af7..2ee21f9847d 100644 --- a/iocore/net/Net.cc +++ b/iocore/net/Net.cc @@ -102,7 +102,8 @@ register_net_stats() const std::pair non_persistent[] = { {"proxy.process.net.accepts_currently_open", net_accepts_currently_open_stat}, {"proxy.process.net.connections_currently_open", net_connections_currently_open_stat}, - {"proxy.process.net.default_inactivity_timeout_applied", default_inactivity_timeout_stat}, + {"proxy.process.net.default_inactivity_timeout_applied", default_inactivity_timeout_applied_stat}, + {"proxy.process.net.default_inactivity_timeout_count", default_inactivity_timeout_count_stat}, {"proxy.process.net.dynamic_keep_alive_timeout_in_count", keep_alive_queue_timeout_count_stat}, {"proxy.process.net.dynamic_keep_alive_timeout_in_total", keep_alive_queue_timeout_total_stat}, {"proxy.process.socks.connections_currently_open", socks_connections_currently_open_stat}, @@ -130,7 +131,8 @@ register_net_stats() NET_CLEAR_DYN_STAT(socks_connections_currently_open_stat); NET_CLEAR_DYN_STAT(keep_alive_queue_timeout_total_stat); NET_CLEAR_DYN_STAT(keep_alive_queue_timeout_count_stat); - NET_CLEAR_DYN_STAT(default_inactivity_timeout_stat); + NET_CLEAR_DYN_STAT(default_inactivity_timeout_count_stat); + NET_CLEAR_DYN_STAT(default_inactivity_timeout_applied_stat); RecRegisterRawStat(net_rsb, RECT_PROCESS, "proxy.process.tcp.total_accepts", RECD_INT, RECP_NON_PERSISTENT, static_cast(net_tcp_accept_stat), RecRawStatSyncSum); @@ -140,6 +142,8 @@ register_net_stats() (int)net_connections_throttled_in_stat, RecRawStatSyncSum); RecRegisterRawStat(net_rsb, RECT_PROCESS, "proxy.process.net.connections_throttled_out", RECD_INT, RECP_PERSISTENT, (int)net_connections_throttled_out_stat, RecRawStatSyncSum); + RecRegisterRawStat(net_rsb, RECT_PROCESS, "proxy.process.net.max.active.connections_throttled_in", RECD_INT, RECP_PERSISTENT, + (int)net_connections_max_active_throttled_in_stat, RecRawStatSyncSum); } void diff --git a/iocore/net/P_Net.h b/iocore/net/P_Net.h index 15a55bb8bb6..45996a60880 100644 --- a/iocore/net/P_Net.h +++ b/iocore/net/P_Net.h @@ -51,12 +51,14 @@ enum Net_Stats { inactivity_cop_lock_acquire_failure_stat, keep_alive_queue_timeout_total_stat, keep_alive_queue_timeout_count_stat, - default_inactivity_timeout_stat, + default_inactivity_timeout_applied_stat, + default_inactivity_timeout_count_stat, net_fastopen_attempts_stat, net_fastopen_successes_stat, net_tcp_accept_stat, net_connections_throttled_in_stat, net_connections_throttled_out_stat, + net_connections_max_active_throttled_in_stat, Net_Stat_Count }; diff --git a/iocore/net/UnixNet.cc b/iocore/net/UnixNet.cc index 788c2ad6f07..bc9ebc9c8ed 100644 --- a/iocore/net/UnixNet.cc +++ b/iocore/net/UnixNet.cc @@ -69,7 +69,21 @@ class InactivityCop : public Continuation continue; } + // set a default inactivity timeout if one is not set + bool use_default_inactivity_timeout = false; + if (ne->next_inactivity_timeout_at == 0 && nh.config.default_inactivity_timeout > 0) { + Debug("inactivity_cop", "vc: %p inactivity timeout not set, setting a default of %d", ne, + nh.config.default_inactivity_timeout); + use_default_inactivity_timeout = true; + ne->set_inactivity_timeout(HRTIME_SECONDS(nh.config.default_inactivity_timeout)); + NET_INCREMENT_DYN_STAT(default_inactivity_timeout_applied_stat); + } + if (ne->next_inactivity_timeout_at && ne->next_inactivity_timeout_at < now) { + if (use_default_inactivity_timeout) { + // track the connections that timed out due to default inactivity + NET_INCREMENT_DYN_STAT(default_inactivity_timeout_count_stat); + } if (nh.keep_alive_queue.in(ne)) { // only stat if the connection is in keep-alive, there can be other inactivity timeouts ink_hrtime diff = (now - (ne->next_inactivity_timeout_at - ne->inactivity_timeout_in)) / HRTIME_SECOND; @@ -721,16 +735,22 @@ NetHandler::add_to_active_queue(NetEvent *ne) max_connections_per_thread_in, active_queue_size, keep_alive_queue_size); ink_assert(mutex->thread_holding == this_ethread()); + bool active_queue_full = false; + // if active queue is over size then close inactive connections if (manage_active_queue() == false) { - // there is no room left in the queue - return false; + active_queue_full = true; } if (active_queue.in(ne)) { // already in the active queue, move the head active_queue.remove(ne); } else { + if (active_queue_full) { + // there is no room left in the queue + NET_SUM_DYN_STAT(net_connections_max_active_throttled_in_stat, 1); + return false; + } // in the keep-alive queue or no queue, new to this queue remove_from_keep_alive_queue(ne); ++active_queue_size; diff --git a/iocore/net/UnixNetVConnection.cc b/iocore/net/UnixNetVConnection.cc index b5d2b89f95a..f85eb0f84d5 100644 --- a/iocore/net/UnixNetVConnection.cc +++ b/iocore/net/UnixNetVConnection.cc @@ -1352,10 +1352,6 @@ TS_INLINE void UnixNetVConnection::set_inactivity_timeout(ink_hrtime timeout_in) { Debug("socket", "Set inactive timeout=%" PRId64 ", for NetVC=%p", timeout_in, this); - if (timeout_in == 0) { - // set default inactivity timeout - timeout_in = HRTIME_SECONDS(nh->config.default_inactivity_timeout); - } inactivity_timeout_in = timeout_in; next_inactivity_timeout_at = Thread::get_hrtime() + inactivity_timeout_in; } diff --git a/proxy/PluginVC.cc b/proxy/PluginVC.cc index f6cc4270592..126b4f0655e 100644 --- a/proxy/PluginVC.cc +++ b/proxy/PluginVC.cc @@ -926,7 +926,7 @@ bool PluginVC::add_to_active_queue() { // do nothing - return false; + return true; } SOCKET diff --git a/proxy/http/Http1ClientSession.cc b/proxy/http/Http1ClientSession.cc index 8f825dc32b1..0c402bf589d 100644 --- a/proxy/http/Http1ClientSession.cc +++ b/proxy/http/Http1ClientSession.cc @@ -465,6 +465,12 @@ Http1ClientSession::new_transaction() return; } + if (!client_vc->add_to_active_queue()) { + // no room in the active queue close the connection + this->do_io_close(); + return; + } + // Defensive programming, make sure nothing persists across // connection re-use half_close = false; @@ -474,7 +480,6 @@ Http1ClientSession::new_transaction() trans.set_proxy_ssn(this); transact_count++; - client_vc->add_to_active_queue(); trans.new_transaction(read_from_early_data > 0 ? true : false); } diff --git a/proxy/http2/Http2ConnectionState.cc b/proxy/http2/Http2ConnectionState.cc index 3711e5cb82f..453920a4bf7 100644 --- a/proxy/http2/Http2ConnectionState.cc +++ b/proxy/http2/Http2ConnectionState.cc @@ -1153,6 +1153,13 @@ Http2ConnectionState::state_closed(int event, void *edata) Http2Stream * Http2ConnectionState::create_stream(Http2StreamId new_id, Http2Error &error) { + // first check if we've hit the active connection limit + if (!ua_session->get_netvc()->add_to_active_queue()) { + error = Http2Error(Http2ErrorClass::HTTP2_ERROR_CLASS_CONNECTION, Http2ErrorCode::HTTP2_ERROR_INTERNAL_ERROR, + "refused to create new stream, maxed out active connections"); + return nullptr; + } + // In half_close state, TS doesn't create new stream. Because GOAWAY frame is sent to client if (ua_session->get_half_close_local_flag()) { error = Http2Error(Http2ErrorClass::HTTP2_ERROR_CLASS_STREAM, Http2ErrorCode::HTTP2_ERROR_REFUSED_STREAM, @@ -1226,7 +1233,6 @@ Http2ConnectionState::create_stream(Http2StreamId new_id, Http2Error &error) new_stream->mutex = new_ProxyMutex(); new_stream->is_first_transaction_flag = get_stream_requests() == 0; increment_stream_requests(); - ua_session->get_netvc()->add_to_active_queue(); return new_stream; } From 19ab77004dbdc27fce1e2c7c2cdd2c3bae3db7dd Mon Sep 17 00:00:00 2001 From: Sudheer Vinukonda Date: Mon, 11 May 2020 11:29:46 -0700 Subject: [PATCH 2/5] Move the default inactivity timeout changes to a separate PR --- .../monitoring/statistics/core/network-io.en.rst | 1 - iocore/net/Net.cc | 6 ++---- iocore/net/P_Net.h | 3 +-- iocore/net/UnixNet.cc | 14 -------------- iocore/net/UnixNetVConnection.cc | 4 ++++ 5 files changed, 7 insertions(+), 21 deletions(-) diff --git a/doc/admin-guide/monitoring/statistics/core/network-io.en.rst b/doc/admin-guide/monitoring/statistics/core/network-io.en.rst index 7fd89962735..8b832531d20 100644 --- a/doc/admin-guide/monitoring/statistics/core/network-io.en.rst +++ b/doc/admin-guide/monitoring/statistics/core/network-io.en.rst @@ -71,7 +71,6 @@ Network I/O .. ts:stat:: global proxy.process.net.default_inactivity_timeout_applied integer .. ts:stat:: global proxy.process.net.default_inactivity_timeout_count integer -.. ts:stat:: global proxy.process.net.dynamic_keep_alive_timeout_in_count integer .. ts:stat:: global proxy.process.net.dynamic_keep_alive_timeout_in_total integer .. ts:stat:: global proxy.process.net.inactivity_cop_lock_acquire_failure integer .. ts:stat:: global proxy.process.net.net_handler_run integer diff --git a/iocore/net/Net.cc b/iocore/net/Net.cc index 2ee21f9847d..1813c0dc626 100644 --- a/iocore/net/Net.cc +++ b/iocore/net/Net.cc @@ -102,8 +102,7 @@ register_net_stats() const std::pair non_persistent[] = { {"proxy.process.net.accepts_currently_open", net_accepts_currently_open_stat}, {"proxy.process.net.connections_currently_open", net_connections_currently_open_stat}, - {"proxy.process.net.default_inactivity_timeout_applied", default_inactivity_timeout_applied_stat}, - {"proxy.process.net.default_inactivity_timeout_count", default_inactivity_timeout_count_stat}, + {"proxy.process.net.default_inactivity_timeout_applied", default_inactivity_timeout_stat}, {"proxy.process.net.dynamic_keep_alive_timeout_in_count", keep_alive_queue_timeout_count_stat}, {"proxy.process.net.dynamic_keep_alive_timeout_in_total", keep_alive_queue_timeout_total_stat}, {"proxy.process.socks.connections_currently_open", socks_connections_currently_open_stat}, @@ -131,8 +130,7 @@ register_net_stats() NET_CLEAR_DYN_STAT(socks_connections_currently_open_stat); NET_CLEAR_DYN_STAT(keep_alive_queue_timeout_total_stat); NET_CLEAR_DYN_STAT(keep_alive_queue_timeout_count_stat); - NET_CLEAR_DYN_STAT(default_inactivity_timeout_count_stat); - NET_CLEAR_DYN_STAT(default_inactivity_timeout_applied_stat); + NET_CLEAR_DYN_STAT(default_inactivity_timeout_stat); RecRegisterRawStat(net_rsb, RECT_PROCESS, "proxy.process.tcp.total_accepts", RECD_INT, RECP_NON_PERSISTENT, static_cast(net_tcp_accept_stat), RecRawStatSyncSum); diff --git a/iocore/net/P_Net.h b/iocore/net/P_Net.h index 45996a60880..9a749f9cce4 100644 --- a/iocore/net/P_Net.h +++ b/iocore/net/P_Net.h @@ -51,8 +51,7 @@ enum Net_Stats { inactivity_cop_lock_acquire_failure_stat, keep_alive_queue_timeout_total_stat, keep_alive_queue_timeout_count_stat, - default_inactivity_timeout_applied_stat, - default_inactivity_timeout_count_stat, + default_inactivity_timeout_stat, net_fastopen_attempts_stat, net_fastopen_successes_stat, net_tcp_accept_stat, diff --git a/iocore/net/UnixNet.cc b/iocore/net/UnixNet.cc index bc9ebc9c8ed..2395bf48a07 100644 --- a/iocore/net/UnixNet.cc +++ b/iocore/net/UnixNet.cc @@ -69,21 +69,7 @@ class InactivityCop : public Continuation continue; } - // set a default inactivity timeout if one is not set - bool use_default_inactivity_timeout = false; - if (ne->next_inactivity_timeout_at == 0 && nh.config.default_inactivity_timeout > 0) { - Debug("inactivity_cop", "vc: %p inactivity timeout not set, setting a default of %d", ne, - nh.config.default_inactivity_timeout); - use_default_inactivity_timeout = true; - ne->set_inactivity_timeout(HRTIME_SECONDS(nh.config.default_inactivity_timeout)); - NET_INCREMENT_DYN_STAT(default_inactivity_timeout_applied_stat); - } - if (ne->next_inactivity_timeout_at && ne->next_inactivity_timeout_at < now) { - if (use_default_inactivity_timeout) { - // track the connections that timed out due to default inactivity - NET_INCREMENT_DYN_STAT(default_inactivity_timeout_count_stat); - } if (nh.keep_alive_queue.in(ne)) { // only stat if the connection is in keep-alive, there can be other inactivity timeouts ink_hrtime diff = (now - (ne->next_inactivity_timeout_at - ne->inactivity_timeout_in)) / HRTIME_SECOND; diff --git a/iocore/net/UnixNetVConnection.cc b/iocore/net/UnixNetVConnection.cc index f85eb0f84d5..b5d2b89f95a 100644 --- a/iocore/net/UnixNetVConnection.cc +++ b/iocore/net/UnixNetVConnection.cc @@ -1352,6 +1352,10 @@ TS_INLINE void UnixNetVConnection::set_inactivity_timeout(ink_hrtime timeout_in) { Debug("socket", "Set inactive timeout=%" PRId64 ", for NetVC=%p", timeout_in, this); + if (timeout_in == 0) { + // set default inactivity timeout + timeout_in = HRTIME_SECONDS(nh->config.default_inactivity_timeout); + } inactivity_timeout_in = timeout_in; next_inactivity_timeout_at = Thread::get_hrtime() + inactivity_timeout_in; } From 326d3a62b964f11df6038273f205994829722e1e Mon Sep 17 00:00:00 2001 From: Sudheer Vinukonda Date: Mon, 11 May 2020 21:05:13 -0700 Subject: [PATCH 3/5] Add docs for the max inbound total and active conns --- doc/admin-guide/files/records.config.en.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/doc/admin-guide/files/records.config.en.rst b/doc/admin-guide/files/records.config.en.rst index 53bd09c8278..0f3fbd17d36 100644 --- a/doc/admin-guide/files/records.config.en.rst +++ b/doc/admin-guide/files/records.config.en.rst @@ -409,6 +409,20 @@ Network handled. This should be tuned according to your memory size, and expected work load. If this is set to 0, the throttling logic is disabled. +.. ts:cv:: CONFIG proxy.config.net.max_connections_in INT 30000 + + The total number of client connections that the :program:`traffic_server` + can handle simultaneously. This should be tuned according to your memory size, + and expected work load (network, cpu etc). This limit includes both keepalive + and active client connections that :program:`traffic_server` can handle at + any given instant. + +.. ts:cv:: CONFIG proxy.config.net.max_active_connections_in INT 10000 + + The total number of active client connections that the :program:`traffic_server` + can handle simultaneously. This should be tuned according to your memory size, + and expected work load (network, cpu etc). + .. ts:cv:: CONFIG proxy.config.net.default_inactivity_timeout INT 86400 :reloadable: From f3c282d4315125bf5adf8795070255b0272bcee0 Mon Sep 17 00:00:00 2001 From: Sudheer Vinukonda Date: Mon, 11 May 2020 21:12:47 -0700 Subject: [PATCH 4/5] Change the http/2 throttle status code to no error --- proxy/http2/Http2ConnectionState.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proxy/http2/Http2ConnectionState.cc b/proxy/http2/Http2ConnectionState.cc index 453920a4bf7..bd4833c1530 100644 --- a/proxy/http2/Http2ConnectionState.cc +++ b/proxy/http2/Http2ConnectionState.cc @@ -1155,7 +1155,7 @@ Http2ConnectionState::create_stream(Http2StreamId new_id, Http2Error &error) { // first check if we've hit the active connection limit if (!ua_session->get_netvc()->add_to_active_queue()) { - error = Http2Error(Http2ErrorClass::HTTP2_ERROR_CLASS_CONNECTION, Http2ErrorCode::HTTP2_ERROR_INTERNAL_ERROR, + error = Http2Error(Http2ErrorClass::HTTP2_ERROR_CLASS_NONE, Http2ErrorCode::HTTP2_ERROR_NO_ERROR, "refused to create new stream, maxed out active connections"); return nullptr; } From 4a5b06a73796eb6d8f191ede170d12acd95055b6 Mon Sep 17 00:00:00 2001 From: Sudheer Vinukonda Date: Tue, 12 May 2020 06:02:20 -0700 Subject: [PATCH 5/5] Change back the h2 error class to Connection class Allow to disable active connection tracking Docs --- doc/admin-guide/files/records.config.en.rst | 8 +++++--- iocore/net/UnixNet.cc | 5 +++++ proxy/http2/Http2ConnectionState.cc | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/doc/admin-guide/files/records.config.en.rst b/doc/admin-guide/files/records.config.en.rst index 0f3fbd17d36..899f7a5ad55 100644 --- a/doc/admin-guide/files/records.config.en.rst +++ b/doc/admin-guide/files/records.config.en.rst @@ -419,9 +419,11 @@ Network .. ts:cv:: CONFIG proxy.config.net.max_active_connections_in INT 10000 - The total number of active client connections that the :program:`traffic_server` - can handle simultaneously. This should be tuned according to your memory size, - and expected work load (network, cpu etc). + The total number of active client connections that the |TS| can handle + simultaneously. This should be tuned according to your memory size, + and expected work load (network, cpu etc). If this is set to 0, active + connection tracking is disabled and active connections have no separate + limit and the total connections follow `proxy.config.net.connections_throttle` .. ts:cv:: CONFIG proxy.config.net.default_inactivity_timeout INT 86400 :reloadable: diff --git a/iocore/net/UnixNet.cc b/iocore/net/UnixNet.cc index 2395bf48a07..3f8f9960ef4 100644 --- a/iocore/net/UnixNet.cc +++ b/iocore/net/UnixNet.cc @@ -562,6 +562,11 @@ NetHandler::manage_active_queue(bool ignore_queue_size = false) max_connections_per_thread_in, max_connections_active_per_thread_in, total_connections_in, active_queue_size, keep_alive_queue_size); + if (!max_connections_active_per_thread_in) { + // active queue has no max + return true; + } + if (ignore_queue_size == false && max_connections_active_per_thread_in > active_queue_size) { return true; } diff --git a/proxy/http2/Http2ConnectionState.cc b/proxy/http2/Http2ConnectionState.cc index bd4833c1530..8966c731941 100644 --- a/proxy/http2/Http2ConnectionState.cc +++ b/proxy/http2/Http2ConnectionState.cc @@ -1155,7 +1155,7 @@ Http2ConnectionState::create_stream(Http2StreamId new_id, Http2Error &error) { // first check if we've hit the active connection limit if (!ua_session->get_netvc()->add_to_active_queue()) { - error = Http2Error(Http2ErrorClass::HTTP2_ERROR_CLASS_NONE, Http2ErrorCode::HTTP2_ERROR_NO_ERROR, + error = Http2Error(Http2ErrorClass::HTTP2_ERROR_CLASS_CONNECTION, Http2ErrorCode::HTTP2_ERROR_NO_ERROR, "refused to create new stream, maxed out active connections"); return nullptr; }