From 00cbfac3c827dae9221d3213cfead84f27608a3f Mon Sep 17 00:00:00 2001 From: "Wladimir J. van der Laan" Date: Thu, 8 Feb 2018 08:53:40 +0100 Subject: [PATCH 1/4] Merge #12366: http: Join worker threads before deleting work queue 11e0151 http: Remove numThreads and ThreadCounter (Wladimir J. van der Laan) f946654 http: Remove WaitExit from WorkQueue (Wladimir J. van der Laan) b1c2370 http: Join worker threads before deleting work queue (Wladimir J. van der Laan) Pull request description: This prevents a potential race condition if control flow ends up in `ShutdownHTTPServer` before the thread gets to `queue->Run()`, deleting the work queue while workers are still going to use it. Meant to fix #12362. Tree-SHA512: 8108514aeee5b2067a3736ed028014b580d1cbf8530ac7682b8a23070133dfa1ca21db4358c9158ea57e8811e0551395b6cb769887876b9cfce067ee968d0642 --- src/httpserver.cpp | 48 ++++++++-------------------------------------- 1 file changed, 8 insertions(+), 40 deletions(-) diff --git a/src/httpserver.cpp b/src/httpserver.cpp index 15f29beeac46..c10a70ca51a3 100644 --- a/src/httpserver.cpp +++ b/src/httpserver.cpp @@ -73,34 +73,13 @@ class WorkQueue std::deque> queue; bool running; size_t maxDepth; - int numThreads; - - /** RAII object to keep track of number of running worker threads */ - class ThreadCounter - { - public: - WorkQueue &wq; - ThreadCounter(WorkQueue &w): wq(w) - { - std::lock_guard lock(wq.cs); - wq.numThreads += 1; - } - ~ThreadCounter() - { - std::lock_guard lock(wq.cs); - wq.numThreads -= 1; - wq.cond.notify_all(); - } - }; public: WorkQueue(size_t _maxDepth) : running(true), - maxDepth(_maxDepth), - numThreads(0) + maxDepth(_maxDepth) { } - /** Precondition: worker threads have all stopped - * (call WaitExit) + /** Precondition: worker threads have all stopped (they have been joined). */ ~WorkQueue() { @@ -119,7 +98,6 @@ class WorkQueue /** Thread function */ void Run() { - ThreadCounter count(*this); while (true) { std::unique_ptr i; { @@ -141,14 +119,6 @@ class WorkQueue running = false; cond.notify_all(); } - /** Wait for worker threads to exit */ - void WaitExit() - { - std::unique_lock lock(cs); - while (numThreads > 0){ - cond.wait(lock); - } - } }; struct HTTPPathHandler @@ -451,6 +421,7 @@ bool UpdateHTTPServerLogging(bool enable) { std::thread threadHTTP; std::future threadResult; +static std::vector g_thread_http_workers; bool StartHTTPServer() { @@ -462,8 +433,7 @@ bool StartHTTPServer() threadHTTP = std::thread(std::move(task), eventBase, eventHTTP); for (int i = 0; i < rpcThreads; i++) { - std::thread rpc_worker(HTTPWorkQueueRun, workQueue); - rpc_worker.detach(); + g_thread_http_workers.emplace_back(HTTPWorkQueueRun, workQueue); } return true; } @@ -488,12 +458,10 @@ void StopHTTPServer() LogPrint(BCLog::HTTP, "Stopping HTTP server\n"); if (workQueue) { LogPrint(BCLog::HTTP, "Waiting for HTTP worker threads to exit\n"); -#ifndef WIN32 - // ToDo: Disabling WaitExit() for Windows platforms is an ugly workaround for the wallet not - // closing during a repair-restart. It doesn't hurt, though, because threadHTTP.timed_join - // below takes care of this and sends a loopbreak. - workQueue->WaitExit(); -#endif + for (auto& thread: g_thread_http_workers) { + thread.join(); + } + g_thread_http_workers.clear(); delete workQueue; workQueue = nullptr; } From a8e49d33e0f9c53cf25418da7705e2bf23c29b56 Mon Sep 17 00:00:00 2001 From: "Wladimir J. van der Laan" Date: Wed, 18 Oct 2017 16:06:07 +0200 Subject: [PATCH 2/4] Merge #11006: Improve shutdown process MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 793667a Improve shutdown process (João Barbosa) Pull request description: Improve the shutdown time by not having to wait up to 2 seconds. Here is a comparison running `wallet.py` function tests before this PR: ``` 2017-08-08 03:25:20.881000 TestFramework (INFO): Initializing test directory /var/folders/1v/8_69hby54nj2k3n6fywt44x80000gn/T/testq_ramjjr 2017-08-08 03:25:23.853000 TestFramework (INFO): Mining blocks... 2017-08-08 03:25:24.132000 TestFramework (INFO): test getmemoryinfo 2017-08-08 03:25:24.559000 TestFramework (INFO): test gettxout 2017-08-08 03:25:59.858000 TestFramework (INFO): check -rescan 2017-08-08 03:26:07.735000 TestFramework (INFO): check -reindex 2017-08-08 03:26:15.751000 TestFramework (INFO): check -zapwallettxes=1 2017-08-08 03:26:24.105000 TestFramework (INFO): check -zapwallettxes=2 2017-08-08 03:26:36.694000 TestFramework (INFO): Stopping nodes 2017-08-08 03:26:43.599000 TestFramework (INFO): Cleaning up 2017-08-08 03:26:43.612000 TestFramework (INFO): Tests successful ``` After: ``` 2017-08-08 03:24:04.319000 TestFramework (INFO): Initializing test directory /var/folders/1v/8_69hby54nj2k3n6fywt44x80000gn/T/testoqeyi50_ 2017-08-08 03:24:07.035000 TestFramework (INFO): Mining blocks... 2017-08-08 03:24:07.317000 TestFramework (INFO): test getmemoryinfo 2017-08-08 03:24:07.763000 TestFramework (INFO): test gettxout 2017-08-08 03:24:25.715000 TestFramework (INFO): check -rescan 2017-08-08 03:24:27.792000 TestFramework (INFO): check -reindex 2017-08-08 03:24:29.797000 TestFramework (INFO): check -zapwallettxes=1 2017-08-08 03:24:32.207000 TestFramework (INFO): check -zapwallettxes=2 2017-08-08 03:24:36.812000 TestFramework (INFO): Stopping nodes 2017-08-08 03:24:37.915000 TestFramework (INFO): Cleaning up 2017-08-08 03:24:37.927000 TestFramework (INFO): Tests successful ``` This largely improves the time spent in Travis (under evaluation). Tree-SHA512: 023012fb3f8a380addf5995a4bf865862fed712cdd1a648d82a710e6566bc3bd34b6c49f9f06d6cc6bd81ca859da50d30d7f786c816e702549ab642e3476426f --- src/httpserver.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/httpserver.cpp b/src/httpserver.cpp index c10a70ca51a3..902cf88ca23b 100644 --- a/src/httpserver.cpp +++ b/src/httpserver.cpp @@ -467,6 +467,8 @@ void StopHTTPServer() } if (eventBase) { LogPrint(BCLog::HTTP, "Waiting for HTTP event thread to exit\n"); + // Exit the event loop as soon as there are no active events. + event_base_loopexit(eventBase, nullptr); // Give event loop a few seconds to exit (to send back last RPC responses), then break it // Before this was solved with event_base_loopexit, but that didn't work as expected in // at least libevent 2.0.21 and always introduced a delay. In libevent From fefe07003b0171bd2e78a1662363cb8fa627fe77 Mon Sep 17 00:00:00 2001 From: "Wladimir J. van der Laan" Date: Thu, 6 Dec 2018 17:42:52 +0100 Subject: [PATCH 3/4] Merge #14670: http: Fix HTTP server shutdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 28479f926f21f2a91bec5a06671c60e5b0c55532 qa: Test bitcond shutdown (João Barbosa) 8d3f46ec3938e2ba17654fecacd1d2629f9915fd http: Remove timeout to exit event loop (João Barbosa) e98a9eede2fb48ff33a020acc888cbcd83e24bbf http: Remove unnecessary event_base_loopexit call (João Barbosa) 6b13580f4e3842c11abd9b8bee7255fb2472b6fe http: Unlisten sockets after all workers quit (João Barbosa) 18e968581697078c36a3c3818f8906cf134ccadd http: Send "Connection: close" header if shutdown is requested (João Barbosa) 02e1e4eff6cda0bfc24b455a7c1583394cbff6eb rpc: Add wait argument to stop (João Barbosa) Pull request description: Fixes #11777. Reverts #11006. Replaces #13501. With this change the HTTP server will exit gracefully, meaning that all requests will finish processing and sending the response, even if this means to wait more than 2 seconds (current time allowed to exit the event loop). Another small change is that connections are accepted even when the server is stopping, but HTTP requests are rejected. This can be improved later, especially if chunked replies are implemented. Briefly, before this PR, this is the order or events when a request arrives (RPC `stop`): 1. `bufferevent_disable(..., EV_READ)` 2. `StartShutdown()` 3. `evhttp_del_accept_socket(...)` 4. `ThreadHTTP` terminates (event loop exits) because there are no active or pending events thanks to 1. and 3. 5. client doesn't get the response thanks to 4. This can be verified by applying ```diff // Event loop will exit after current HTTP requests have been handled, so // this reply will get back to the client. StartShutdown(); + MilliSleep(2000); return "Bitcoin server stopping"; } ``` and checking the log output: ``` Received a POST request for / from 127.0.0.1:62443 ThreadRPCServer method=stop user=__cookie__ Interrupting HTTP server ** Exited http event loop Interrupting HTTP RPC server Interrupting RPC tor: Thread interrupt Shutdown: In progress... torcontrol thread exit Stopping HTTP RPC server addcon thread exit opencon thread exit Unregistering HTTP handler for / (exactmatch 1) Unregistering HTTP handler for /wallet/ (exactmatch 0) Stopping RPC RPC stopped. Stopping HTTP server Waiting for HTTP worker threads to exit msghand thread exit net thread exit ... sleep 2 seconds ... Waiting for HTTP event thread to exit Stopped HTTP server ``` For this reason point 3. is moved right after all HTTP workers quit. In that moment HTTP replies are queued in the event loop which keeps spinning util all connections are closed. In order to trigger the server side close with keep alive connections (implicit in HTTP/1.1) the header `Connection: close` is sent if shutdown was requested. This can be tested by ``` bitcoind -regtest nc localhost 18443 POST / HTTP/1.1 Authorization: Basic ... Content-Type: application/json Content-Length: 44 {"jsonrpc": "2.0","method":"stop","id":123} ``` Summing up, this PR: - removes explicit event loop exit — event loop exits once there are no active or pending events - changes the moment the listening sockets are removed — explained above - sends header `Connection: close` on active requests when shutdown was requested which is relevant when it's a persistent connection (default in HTTP 1.1) — libevent is aware of this header and closes the connection gracefully - removes event loop explicit break after 2 seconds timeout Tree-SHA512: 4dac1e86abe388697c1e2dedbf31fb36a394cfafe5e64eadbf6ed01d829542785a8c3b91d1ab680d3f03f912d14fc87176428041141441d25dcb6c98a1e069d8 --- src/httpserver.cpp | 32 +++++++------------ src/rpc/client.cpp | 1 + src/rpc/server.cpp | 8 ++++- test/functional/feature_shutdown.py | 28 ++++++++++++++++ .../test_framework/test_framework.py | 8 ++--- test/functional/test_framework/test_node.py | 4 +-- test/functional/test_runner.py | 1 + 7 files changed, 54 insertions(+), 28 deletions(-) create mode 100755 test/functional/feature_shutdown.py diff --git a/src/httpserver.cpp b/src/httpserver.cpp index 902cf88ca23b..0907f271dd53 100644 --- a/src/httpserver.cpp +++ b/src/httpserver.cpp @@ -10,6 +10,7 @@ #include "utilstrencodings.h" #include "netbase.h" #include "rpc/protocol.h" // For HTTP status codes +#include "shutdown.h" #include "sync.h" #include "ui_interface.h" @@ -20,7 +21,6 @@ #include #include #include -#include #include #include @@ -420,7 +420,6 @@ bool UpdateHTTPServerLogging(bool enable) { } std::thread threadHTTP; -std::future threadResult; static std::vector g_thread_http_workers; bool StartHTTPServer() @@ -428,9 +427,7 @@ bool StartHTTPServer() LogPrint(BCLog::HTTP, "Starting HTTP server\n"); int rpcThreads = std::max((long)gArgs.GetArg("-rpcthreads", DEFAULT_HTTP_THREADS), 1L); LogPrintf("HTTP: starting %d worker threads\n", rpcThreads); - std::packaged_task task(ThreadHTTP); - threadResult = task.get_future(); - threadHTTP = std::thread(std::move(task), eventBase, eventHTTP); + threadHTTP = std::thread(ThreadHTTP, eventBase, eventHTTP); for (int i = 0; i < rpcThreads; i++) { g_thread_http_workers.emplace_back(HTTPWorkQueueRun, workQueue); @@ -442,10 +439,6 @@ void InterruptHTTPServer() { LogPrint(BCLog::HTTP, "Interrupting HTTP server\n"); if (eventHTTP) { - // Unlisten sockets - for (evhttp_bound_socket *socket : boundSockets) { - evhttp_del_accept_socket(eventHTTP, socket); - } // Reject requests on current connections evhttp_set_gencb(eventHTTP, http_reject_request_cb, nullptr); } @@ -465,20 +458,14 @@ void StopHTTPServer() delete workQueue; workQueue = nullptr; } + // Unlisten sockets, these are what make the event loop running, which means + // that after this and all connections are closed the event loop will quit. + for (evhttp_bound_socket *socket : boundSockets) { + evhttp_del_accept_socket(eventHTTP, socket); + } + boundSockets.clear(); if (eventBase) { LogPrint(BCLog::HTTP, "Waiting for HTTP event thread to exit\n"); - // Exit the event loop as soon as there are no active events. - event_base_loopexit(eventBase, nullptr); - // Give event loop a few seconds to exit (to send back last RPC responses), then break it - // Before this was solved with event_base_loopexit, but that didn't work as expected in - // at least libevent 2.0.21 and always introduced a delay. In libevent - // master that appears to be solved, so in the future that solution - // could be used again (if desirable). - // (see discussion in https://github.com/bitcoin/bitcoin/pull/6990) - if (threadResult.valid() && threadResult.wait_for(std::chrono::milliseconds(2000)) == std::future_status::timeout) { - LogPrintf("HTTP event loop did not exit within allotted time, sending loopbreak\n"); - event_base_loopbreak(eventBase); - } threadHTTP.join(); } if (eventHTTP) { @@ -583,6 +570,9 @@ void HTTPRequest::WriteHeader(const std::string& hdr, const std::string& value) void HTTPRequest::WriteReply(int nStatus, const std::string& strReply) { assert(!replySent && req); + if (ShutdownRequested()) { + WriteHeader("Connection", "close"); + } // Send event to main http thread to send reply message struct evbuffer* evb = evhttp_request_get_output_buffer(req); assert(evb); diff --git a/src/rpc/client.cpp b/src/rpc/client.cpp index 94caa6b6b0dc..38d2439943bc 100644 --- a/src/rpc/client.cpp +++ b/src/rpc/client.cpp @@ -182,6 +182,7 @@ static const CRPCConvertParam vRPCConvertParams[] = { "echojson", 7, "arg7" }, { "echojson", 8, "arg8" }, { "echojson", 9, "arg9" }, + { "stop", 0, "wait" }, }; class CRPCConvertTable diff --git a/src/rpc/server.cpp b/src/rpc/server.cpp index 7ca85307a9c9..d72d8d87d54a 100644 --- a/src/rpc/server.cpp +++ b/src/rpc/server.cpp @@ -293,6 +293,9 @@ UniValue help(const JSONRPCRequest& jsonRequest) UniValue stop(const JSONRPCRequest& jsonRequest) { // Accept the deprecated and ignored 'detach' boolean argument + // Also accept the hidden 'wait' integer argument (milliseconds) + // For instance, 'stop 1000' makes the call wait 1 second before returning + // to the client (intended for testing) if (jsonRequest.fHelp || jsonRequest.params.size() > 1) throw std::runtime_error( "stop\n" @@ -300,6 +303,9 @@ UniValue stop(const JSONRPCRequest& jsonRequest) // Event loop will exit after current HTTP requests have been handled, so // this reply will get back to the client. StartShutdown(); + if (jsonRequest.params[0].isNum()) { + MilliSleep(jsonRequest.params[0].get_int()); + } return "Dash Core server stopping"; } @@ -327,7 +333,7 @@ static const CRPCCommand vRPCCommands[] = // --------------------- ------------------------ ----------------------- ------ ---------- /* Overall control/query calls */ { "control", "help", &help, true, {"command"} }, - { "control", "stop", &stop, true, {} }, + { "control", "stop", &stop, true, {"wait"} }, { "control", "uptime", &uptime, true, {} }, }; diff --git a/test/functional/feature_shutdown.py b/test/functional/feature_shutdown.py new file mode 100755 index 000000000000..b633fabb1fac --- /dev/null +++ b/test/functional/feature_shutdown.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 +# Copyright (c) 2018 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +"""Test bitcoind shutdown.""" + +from test_framework.test_framework import BitcoinTestFramework +from test_framework.util import assert_equal, get_rpc_proxy +from threading import Thread + +def test_long_call(node): + block = node.waitfornewblock() + assert_equal(block['height'], 0) + +class ShutdownTest(BitcoinTestFramework): + + def set_test_params(self): + self.setup_clean_chain = True + self.num_nodes = 1 + + def run_test(self): + node = get_rpc_proxy(self.nodes[0].url, 1, timeout=600, coveragedir=self.nodes[0].coverage_dir) + Thread(target=test_long_call, args=(node,)).start() + # wait 1 second to ensure event loop waits for current connections to close + self.stop_node(0, wait=1000) + +if __name__ == '__main__': + ShutdownTest().main() diff --git a/test/functional/test_framework/test_framework.py b/test/functional/test_framework/test_framework.py index ecd0822b881e..3d592aa0ce04 100755 --- a/test/functional/test_framework/test_framework.py +++ b/test/functional/test_framework/test_framework.py @@ -280,16 +280,16 @@ def start_nodes(self, extra_args=None, stderr=None): for node in self.nodes: coverage.write_all_rpc_commands(self.options.coveragedir, node.rpc) - def stop_node(self, i): + def stop_node(self, i, wait=0): """Stop a dashd test node""" - self.nodes[i].stop_node() + self.nodes[i].stop_node(wait=wait) self.nodes[i].wait_until_stopped() - def stop_nodes(self): + def stop_nodes(self, wait=0): """Stop multiple dashd test nodes""" for node in self.nodes: # Issue RPC to stop nodes - node.stop_node() + node.stop_node(wait=wait) for node in self.nodes: # Wait for nodes to stop diff --git a/test/functional/test_framework/test_node.py b/test/functional/test_framework/test_node.py index bf8cab88dd35..97d55a63f76b 100755 --- a/test/functional/test_framework/test_node.py +++ b/test/functional/test_framework/test_node.py @@ -119,13 +119,13 @@ def get_wallet_rpc(self, wallet_name): wallet_path = "wallet/%s" % wallet_name return self.rpc / wallet_path - def stop_node(self): + def stop_node(self, wait=0): """Stop the node.""" if not self.running: return self.log.debug("Stopping node") try: - self.stop() + self.stop(wait=wait) except http.client.CannotSendRequest: self.log.exception("Unable to stop node.") del self.p2ps[:] diff --git a/test/functional/test_runner.py b/test/functional/test_runner.py index 441d7d1d7dff..4efd057935dc 100755 --- a/test/functional/test_runner.py +++ b/test/functional/test_runner.py @@ -137,6 +137,7 @@ 'resendwallettransactions.py', 'minchainwork.py', 'p2p-acceptblock.py', # NOTE: needs dash_hash to pass + 'feature_shutdown.py', ] EXTENDED_SCRIPTS = [ From 17151daa2cea00176a9fd73316ac023acef0fc11 Mon Sep 17 00:00:00 2001 From: Alexander Block Date: Mon, 14 Oct 2019 13:01:32 +0200 Subject: [PATCH 4/4] Fix compilation --- src/httpserver.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/httpserver.cpp b/src/httpserver.cpp index 0907f271dd53..040d35e3d3a9 100644 --- a/src/httpserver.cpp +++ b/src/httpserver.cpp @@ -4,13 +4,13 @@ #include "httpserver.h" +#include "init.h" #include "chainparamsbase.h" #include "compat.h" #include "util.h" #include "utilstrencodings.h" #include "netbase.h" #include "rpc/protocol.h" // For HTTP status codes -#include "shutdown.h" #include "sync.h" #include "ui_interface.h" @@ -37,6 +37,10 @@ #endif #endif +#include +#include +#include + /** Maximum size of http request (request line + headers) */ static const size_t MAX_HEADERS_SIZE = 8192;