-
Notifications
You must be signed in to change notification settings - Fork 12.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[lldb] Updated lldb-server to spawn the child process and share socket #101283
[lldb] Updated lldb-server to spawn the child process and share socket #101283
Conversation
@llvm/pr-subscribers-lldb Author: Dmitry Vasilyev (slydiman) Changes
Added also PipeWindows::WriteWithTimeout(), fixed PipeWindows::ReadWithTimeout() and missing initialization of m_read_overlapped.hEvent in the constructor PipeWindows(lldb::pipe_t read, lldb::pipe_t write). Fixes #90923, fixes #56346. This is the part 1 of the replacement of #100670. In the part 2 I plan to switch Patch is 26.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101283.diff 6 Files Affected:
diff --git a/lldb/include/lldb/Host/windows/PipeWindows.h b/lldb/include/lldb/Host/windows/PipeWindows.h
index 4b5be28d7ae6c..c99cf52be46c1 100644
--- a/lldb/include/lldb/Host/windows/PipeWindows.h
+++ b/lldb/include/lldb/Host/windows/PipeWindows.h
@@ -61,6 +61,9 @@ class PipeWindows : public PipeBase {
Status Delete(llvm::StringRef name) override;
Status Write(const void *buf, size_t size, size_t &bytes_written) override;
+ Status WriteWithTimeout(const void *buf, size_t size,
+ const std::chrono::microseconds &timeout,
+ size_t &bytes_written);
Status ReadWithTimeout(void *buf, size_t size,
const std::chrono::microseconds &timeout,
size_t &bytes_read) override;
diff --git a/lldb/source/Host/windows/PipeWindows.cpp b/lldb/source/Host/windows/PipeWindows.cpp
index c82c919607b5b..318d5abbed74d 100644
--- a/lldb/source/Host/windows/PipeWindows.cpp
+++ b/lldb/source/Host/windows/PipeWindows.cpp
@@ -58,7 +58,10 @@ PipeWindows::PipeWindows(pipe_t read, pipe_t write)
}
ZeroMemory(&m_read_overlapped, sizeof(m_read_overlapped));
+ m_read_overlapped.hEvent = ::CreateEventA(nullptr, TRUE, FALSE, nullptr);
+
ZeroMemory(&m_write_overlapped, sizeof(m_write_overlapped));
+ m_write_overlapped.hEvent = ::CreateEventA(nullptr, TRUE, FALSE, nullptr);
}
PipeWindows::~PipeWindows() { Close(); }
@@ -77,6 +80,7 @@ Status PipeWindows::CreateNew(bool child_process_inherit) {
m_write_fd = _open_osfhandle((intptr_t)m_write, _O_WRONLY);
ZeroMemory(&m_write_overlapped, sizeof(m_write_overlapped));
+ m_write_overlapped.hEvent = ::CreateEventA(nullptr, TRUE, FALSE, nullptr);
return Status();
}
@@ -202,6 +206,7 @@ Status PipeWindows::OpenNamedPipe(llvm::StringRef name,
m_write_fd = _open_osfhandle((intptr_t)m_write, _O_WRONLY);
ZeroMemory(&m_write_overlapped, sizeof(m_write_overlapped));
+ m_write_overlapped.hEvent = ::CreateEventA(nullptr, TRUE, FALSE, nullptr);
}
return Status();
@@ -228,6 +233,8 @@ int PipeWindows::ReleaseWriteFileDescriptor() {
return PipeWindows::kInvalidDescriptor;
int result = m_write_fd;
m_write_fd = PipeWindows::kInvalidDescriptor;
+ if (m_write_overlapped.hEvent)
+ ::CloseHandle(m_write_overlapped.hEvent);
m_write = INVALID_HANDLE_VALUE;
ZeroMemory(&m_write_overlapped, sizeof(m_write_overlapped));
return result;
@@ -250,6 +257,9 @@ void PipeWindows::CloseWriteFileDescriptor() {
if (!CanWrite())
return;
+ if (m_write_overlapped.hEvent)
+ ::CloseHandle(m_write_overlapped.hEvent);
+
_close(m_write_fd);
m_write = INVALID_HANDLE_VALUE;
m_write_fd = PipeWindows::kInvalidDescriptor;
@@ -283,54 +293,94 @@ Status PipeWindows::ReadWithTimeout(void *buf, size_t size,
DWORD sys_bytes_read = size;
BOOL result = ::ReadFile(m_read, buf, sys_bytes_read, &sys_bytes_read,
&m_read_overlapped);
- if (!result && GetLastError() != ERROR_IO_PENDING)
- return Status(::GetLastError(), eErrorTypeWin32);
-
- DWORD timeout = (duration == std::chrono::microseconds::zero())
- ? INFINITE
- : duration.count() * 1000;
- DWORD wait_result = ::WaitForSingleObject(m_read_overlapped.hEvent, timeout);
- if (wait_result != WAIT_OBJECT_0) {
- // The operation probably failed. However, if it timed out, we need to
- // cancel the I/O. Between the time we returned from WaitForSingleObject
- // and the time we call CancelIoEx, the operation may complete. If that
- // hapens, CancelIoEx will fail and return ERROR_NOT_FOUND. If that
- // happens, the original operation should be considered to have been
- // successful.
- bool failed = true;
- DWORD failure_error = ::GetLastError();
- if (wait_result == WAIT_TIMEOUT) {
- BOOL cancel_result = CancelIoEx(m_read, &m_read_overlapped);
- if (!cancel_result && GetLastError() == ERROR_NOT_FOUND)
- failed = false;
+ if (!result) {
+ if (GetLastError() != ERROR_IO_PENDING)
+ return Status(::GetLastError(), eErrorTypeWin32);
+ else {
+ DWORD timeout = (duration == std::chrono::microseconds::zero())
+ ? INFINITE
+ : duration.count() * 1000;
+ DWORD wait_result =
+ ::WaitForSingleObject(m_read_overlapped.hEvent, timeout);
+ if (wait_result != WAIT_OBJECT_0) {
+ // The operation probably failed. However, if it timed out, we need to
+ // cancel the I/O. Between the time we returned from WaitForSingleObject
+ // and the time we call CancelIoEx, the operation may complete. If that
+ // hapens, CancelIoEx will fail and return ERROR_NOT_FOUND. If that
+ // happens, the original operation should be considered to have been
+ // successful.
+ bool failed = true;
+ DWORD failure_error = ::GetLastError();
+ if (wait_result == WAIT_TIMEOUT) {
+ BOOL cancel_result = CancelIoEx(m_read, &m_read_overlapped);
+ if (!cancel_result && GetLastError() == ERROR_NOT_FOUND)
+ failed = false;
+ }
+ if (failed)
+ return Status(failure_error, eErrorTypeWin32);
+ }
+
+ // Now we call GetOverlappedResult setting bWait to false, since we've
+ // already waited as long as we're willing to.
+ if (!GetOverlappedResult(m_read, &m_read_overlapped, &sys_bytes_read,
+ FALSE))
+ return Status(::GetLastError(), eErrorTypeWin32);
}
- if (failed)
- return Status(failure_error, eErrorTypeWin32);
}
-
- // Now we call GetOverlappedResult setting bWait to false, since we've
- // already waited as long as we're willing to.
- if (!GetOverlappedResult(m_read, &m_read_overlapped, &sys_bytes_read, FALSE))
- return Status(::GetLastError(), eErrorTypeWin32);
-
bytes_read = sys_bytes_read;
return Status();
}
-Status PipeWindows::Write(const void *buf, size_t num_bytes,
- size_t &bytes_written) {
+Status PipeWindows::WriteWithTimeout(const void *buf, size_t size,
+ const std::chrono::microseconds &duration,
+ size_t &bytes_written) {
if (!CanWrite())
return Status(ERROR_INVALID_HANDLE, eErrorTypeWin32);
- DWORD sys_bytes_written = 0;
- BOOL write_result = ::WriteFile(m_write, buf, num_bytes, &sys_bytes_written,
- &m_write_overlapped);
- if (!write_result && GetLastError() != ERROR_IO_PENDING)
- return Status(::GetLastError(), eErrorTypeWin32);
+ bytes_written = 0;
+ DWORD sys_bytes_write = size;
+ BOOL result = ::WriteFile(m_write, buf, sys_bytes_write, &sys_bytes_write,
+ &m_write_overlapped);
+ if (!result) {
+ if (GetLastError() != ERROR_IO_PENDING)
+ return Status(::GetLastError(), eErrorTypeWin32);
+ else {
+ DWORD timeout = (duration == std::chrono::microseconds::zero())
+ ? INFINITE
+ : duration.count() * 1000;
+ DWORD wait_result =
+ ::WaitForSingleObject(m_write_overlapped.hEvent, timeout);
+ if (wait_result != WAIT_OBJECT_0) {
+ // The operation probably failed. However, if it timed out, we need to
+ // cancel the I/O. Between the time we returned from WaitForSingleObject
+ // and the time we call CancelIoEx, the operation may complete. If that
+ // hapens, CancelIoEx will fail and return ERROR_NOT_FOUND. If that
+ // happens, the original operation should be considered to have been
+ // successful.
+ bool failed = true;
+ DWORD failure_error = ::GetLastError();
+ if (wait_result == WAIT_TIMEOUT) {
+ BOOL cancel_result = CancelIoEx(m_write, &m_write_overlapped);
+ if (!cancel_result && GetLastError() == ERROR_NOT_FOUND)
+ failed = false;
+ }
+ if (failed)
+ return Status(failure_error, eErrorTypeWin32);
+ }
+
+ // Now we call GetOverlappedResult setting bWait to false, since we've
+ // already waited as long as we're willing to.
+ if (!GetOverlappedResult(m_write, &m_write_overlapped, &sys_bytes_write,
+ FALSE))
+ return Status(::GetLastError(), eErrorTypeWin32);
+ }
+ }
- BOOL result = GetOverlappedResult(m_write, &m_write_overlapped,
- &sys_bytes_written, TRUE);
- if (!result)
- return Status(::GetLastError(), eErrorTypeWin32);
+ bytes_written = sys_bytes_write;
return Status();
}
+
+Status PipeWindows::Write(const void *buf, size_t size, size_t &bytes_written) {
+ return WriteWithTimeout(buf, size, std::chrono::microseconds::zero(),
+ bytes_written);
+}
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp
index 65f1cc12ba307..18824a6cb08ca 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp
@@ -159,6 +159,40 @@ GDBRemoteCommunicationServerPlatform::GDBRemoteCommunicationServerPlatform(
GDBRemoteCommunicationServerPlatform::~GDBRemoteCommunicationServerPlatform() =
default;
+void GDBRemoteCommunicationServerPlatform::Proc(
+ const lldb_private::Args &args) {
+ if (!IsConnected())
+ return;
+
+ if (args.GetArgumentCount() > 0) {
+ lldb::pid_t pid = LLDB_INVALID_PROCESS_ID;
+ std::optional<uint16_t> port;
+ std::string socket_name;
+ Status error = LaunchGDBServer(args,
+ "", // hostname
+ pid, port, socket_name);
+ if (error.Success())
+ SetPendingGdbServer(pid, *port, socket_name);
+ }
+
+ bool interrupt = false;
+ bool done = false;
+ Status error;
+ while (!interrupt && !done) {
+ if (GetPacketAndSendResponse(std::nullopt, error, interrupt, done) !=
+ GDBRemoteCommunication::PacketResult::Success)
+ break;
+ }
+
+ if (error.Fail()) {
+ Log *log = GetLog(LLDBLog::Platform);
+ LLDB_LOGF(log,
+ "GDBRemoteCommunicationServerPlatform::%s() "
+ "GetPacketAndSendResponse: %s",
+ __FUNCTION__, error.AsCString());
+ }
+}
+
Status GDBRemoteCommunicationServerPlatform::LaunchGDBServer(
const lldb_private::Args &args, std::string hostname, lldb::pid_t &pid,
std::optional<uint16_t> &port, std::string &socket_name) {
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.h
index 1853025466cf0..2f3f313c917c4 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.h
@@ -85,7 +85,7 @@ class GDBRemoteCommunicationServerPlatform
void SetPortOffset(uint16_t port_offset);
- void SetInferiorArguments(const lldb_private::Args &args);
+ void Proc(const lldb_private::Args &args);
// Set port if you want to use a specific port number.
// Otherwise port will be set to the port that was chosen for you.
diff --git a/lldb/tools/lldb-server/LLDBServerUtilities.cpp b/lldb/tools/lldb-server/LLDBServerUtilities.cpp
index c3a8df19e969e..5facfbf3105e9 100644
--- a/lldb/tools/lldb-server/LLDBServerUtilities.cpp
+++ b/lldb/tools/lldb-server/LLDBServerUtilities.cpp
@@ -27,11 +27,13 @@ class TestLogHandler : public LogHandler {
: m_stream_sp(stream_sp) {}
void Emit(llvm::StringRef message) override {
+ std::lock_guard<std::mutex> guard(m_mutex);
(*m_stream_sp) << message;
m_stream_sp->flush();
}
private:
+ std::mutex m_mutex;
std::shared_ptr<raw_ostream> m_stream_sp;
};
diff --git a/lldb/tools/lldb-server/lldb-platform.cpp b/lldb/tools/lldb-server/lldb-platform.cpp
index 7148a1d2a3094..f14b90b1c28fc 100644
--- a/lldb/tools/lldb-server/lldb-platform.cpp
+++ b/lldb/tools/lldb-server/lldb-platform.cpp
@@ -22,6 +22,7 @@
#include <optional>
#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
@@ -32,8 +33,10 @@
#include "lldb/Host/ConnectionFileDescriptor.h"
#include "lldb/Host/HostGetOpt.h"
#include "lldb/Host/OptionParser.h"
+#include "lldb/Host/Socket.h"
#include "lldb/Host/common/TCPSocket.h"
#include "lldb/Utility/FileSpec.h"
+#include "lldb/Utility/LLDBLog.h"
#include "lldb/Utility/Status.h"
using namespace lldb;
@@ -60,6 +63,9 @@ static struct option g_long_options[] = {
{"max-gdbserver-port", required_argument, nullptr, 'M'},
{"socket-file", required_argument, nullptr, 'f'},
{"server", no_argument, &g_server, 1},
+#if defined(_WIN32)
+ {"accept", required_argument, nullptr, 'a'},
+#endif
{nullptr, 0, nullptr, 0}};
#if defined(__APPLE__)
@@ -114,6 +120,218 @@ static Status save_socket_id_to_file(const std::string &socket_id,
return status;
}
+static GDBRemoteCommunicationServerPlatform::PortMap gdbserver_portmap;
+static std::mutex gdbserver_portmap_mutex;
+
+#if defined(_WIN32)
+static void SpawnProcessReaped(lldb::pid_t pid, int signal, int status) {
+ std::lock_guard<std::mutex> guard(gdbserver_portmap_mutex);
+ gdbserver_portmap.FreePortForProcess(pid);
+}
+
+static bool SpawnProcessParent(const char *progname, Connection *conn,
+ uint16_t gdb_port, uint16_t port_offset,
+ const lldb_private::Args &args,
+ const std::string &log_file,
+ const StringRef log_channels) {
+ Log *log = GetLog(LLDBLog::Platform);
+ Pipe socket_pipe;
+ Status error = socket_pipe.CreateNew(true);
+ if (error.Fail()) {
+ LLDB_LOGF(log,
+ "lldb-platform parent: "
+ "cannot create pipe: %s",
+ error.AsCString());
+ return false;
+ }
+
+ ProcessLaunchInfo launch_info;
+ FileSpec self_spec(progname, FileSpec::Style::native);
+ launch_info.SetExecutableFile(self_spec, true);
+ Args &self_args = launch_info.GetArguments();
+ self_args.AppendArgument(llvm::StringRef("platform"));
+ self_args.AppendArgument(llvm::StringRef("--accept"));
+ self_args.AppendArgument(llvm::to_string(socket_pipe.GetReadPipe()));
+ if (gdb_port) {
+ self_args.AppendArgument(llvm::StringRef("--gdbserver-port"));
+ self_args.AppendArgument(llvm::to_string(gdb_port));
+ }
+ if (port_offset > 0) {
+ self_args.AppendArgument(llvm::StringRef("--port-offset"));
+ self_args.AppendArgument(llvm::to_string(port_offset));
+ }
+ if (!log_file.empty()) {
+ self_args.AppendArgument(llvm::StringRef("--log-file"));
+ self_args.AppendArgument(log_file);
+ }
+ if (!log_channels.empty()) {
+ self_args.AppendArgument(llvm::StringRef("--log-channels"));
+ self_args.AppendArgument(log_channels);
+ }
+ if (args.GetArgumentCount() > 0) {
+ self_args.AppendArgument("--");
+ self_args.AppendArguments(args);
+ }
+
+ launch_info.SetLaunchInSeparateProcessGroup(false);
+ launch_info.SetMonitorProcessCallback(&SpawnProcessReaped);
+
+ // Copy the current environment.
+ // WSASocket(FROM_PROTOCOL_INFO) will fail in the child process
+ // with the error WSAEPROVIDERFAILEDINIT if the SystemRoot is missing
+ // in the environment.
+ launch_info.GetEnvironment() = Host::GetEnvironment();
+
+ launch_info.GetFlags().Set(eLaunchFlagDisableSTDIO);
+
+ launch_info.AppendCloseFileAction(socket_pipe.GetWriteFileDescriptor());
+
+ // Close STDIN, STDOUT and STDERR.
+ launch_info.AppendCloseFileAction(STDIN_FILENO);
+ launch_info.AppendCloseFileAction(STDOUT_FILENO);
+ launch_info.AppendCloseFileAction(STDERR_FILENO);
+
+ // Redirect STDIN, STDOUT and STDERR to "/dev/null".
+ launch_info.AppendSuppressFileAction(STDIN_FILENO, true, false);
+ launch_info.AppendSuppressFileAction(STDOUT_FILENO, false, true);
+ launch_info.AppendSuppressFileAction(STDERR_FILENO, false, true);
+
+ std::string cmd;
+ self_args.GetCommandString(cmd);
+
+ error = Host::LaunchProcess(launch_info);
+ if (error.Fail()) {
+ LLDB_LOGF(log,
+ "lldb-platform parent: "
+ "cannot launch child process for connection: %s",
+ error.AsCString());
+ return false;
+ }
+
+ lldb::pid_t childPid = launch_info.GetProcessID();
+ if (childPid == LLDB_INVALID_PROCESS_ID) {
+ LLDB_LOGF(log, "lldb-platform parent: "
+ "cannot launch child process for connection: invalid pid");
+ return false;
+ }
+ LLDB_LOGF(log,
+ "lldb-platform parent: "
+ "launched '%s', pid=0x%x",
+ cmd.c_str(), childPid);
+
+ {
+ std::lock_guard<std::mutex> guard(gdbserver_portmap_mutex);
+ gdbserver_portmap.AssociatePortWithProcess(gdb_port, childPid);
+ }
+
+ if (socket_pipe.CanRead())
+ socket_pipe.CloseReadFileDescriptor();
+ if (!socket_pipe.CanWrite()) {
+ LLDB_LOGF(log, "lldb-platform parent: "
+ "cannot write to socket_pipe");
+ Host::Kill(childPid, SIGTERM);
+ return false;
+ }
+
+ const TCPSocket &socket =
+ static_cast<const TCPSocket &>(*conn->GetReadObject());
+ NativeSocket nativeSocket = socket.GetNativeSocket();
+
+ WSAPROTOCOL_INFO protocol_info;
+ if (::WSADuplicateSocket(nativeSocket, childPid, &protocol_info) ==
+ SOCKET_ERROR) {
+ LLDB_LOGF(log,
+ "lldb-platform parent: "
+ "WSADuplicateSocket() failed, error: %d",
+ ::WSAGetLastError());
+ Host::Kill(childPid, SIGTERM);
+ return false;
+ }
+
+ size_t num_bytes;
+ error = socket_pipe.WriteWithTimeout(&protocol_info, sizeof(protocol_info),
+ std::chrono::seconds(2), num_bytes);
+ if (error.Fail()) {
+ LLDB_LOGF(log,
+ "lldb-platform parent: "
+ "socket_pipe.WriteWithTimeout(WSAPROTOCOL_INFO) failed: %s",
+ error.AsCString());
+ Host::Kill(childPid, SIGTERM);
+ return false;
+ }
+ if (num_bytes != sizeof(protocol_info)) {
+ LLDB_LOGF(log,
+ "lldb-platform parent: "
+ "socket_pipe.WriteWithTimeout(WSAPROTOCOL_INFO) failed: %d bytes",
+ num_bytes);
+ Host::Kill(childPid, SIGTERM);
+ return false;
+ }
+
+ socket_pipe.Close();
+
+ return true;
+}
+
+static bool SpawnProcessChild(pipe_t accept_fd,
+ const std::string &listen_host_port,
+ uint16_t port_offset,
+ const lldb_private::Args &args) {
+ if (accept_fd == INVALID_HANDLE_VALUE)
+ return false;
+
+ Log *log = GetLog(LLDBLog::Platform);
+ if (!listen_host_port.empty()) {
+ LLDB_LOGF(log, "lldb-platform child: "
+ "ambiguous parameters --listen and --accept");
+ exit(SOCKET_ERROR);
+ }
+
+ Pipe socket_pipe(accept_fd, LLDB_INVALID_PIPE);
+
+ WSAPROTOCOL_INFO protocol_info;
+ size_t num_bytes;
+ Status error =
+ socket_pipe.ReadWithTimeout(&protocol_info, sizeof(protocol_info),
+ std::chrono::seconds(2), num_bytes);
+ if (error.Fail()) {
+ LLDB_LOGF(log,
+ "lldb-platform child: "
+ "socket_pipe(0x%x).ReadWithTimeout(WSAPROTOCOL_INFO) failed: %s",
+ accept_fd, error.AsCString());
+ exit(SOCKET_ERROR);
+ }
+ if (num_bytes != sizeof(protocol_info)) {
+ LLDB_LOGF(log,
+ "lldb-platform child: "
+ "socket_pipe(0x%x).ReadWithTimeout(WSAPROTOCOL_INFO) failed: "
+ "%d bytes",
+ num_bytes);
+ exit(SOCKET_ERROR);
+ }
+
+ NativeSocket socket = ::WSASocket(FROM_PROTOCOL_INFO, FROM_PROTOCOL_INFO,
+ FROM_PROTOCOL_INFO, &protocol_info, 0, 0);
+ if (socket == INVALID_SOCKET) {
+ LLDB_LOGF(log,
+ "lldb-platform child: "
+ "WSASocket(FROM_PROTOCOL_INFO) failed: error %d",
+ ::WSAGetLastError());
+ exit(SOCKET_ERROR);
+ }
+
+ Connection *conn =
+ new ConnectionFileDescriptor(new TCPSocket(socket, true, false));
+ GDBRemoteCommunicationServ...
[truncated]
|
28ee2d2
to
396b9a0
Compare
I have added the issue #101475. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What, if any, are the changes to how you would run lldb-server on Windows? I see a new argument here but not sure if that's only meant for lldb-server to pass to itself when it starts a child process.
if (!result) { | ||
if (GetLastError() != ERROR_IO_PENDING) | ||
return Status(::GetLastError(), eErrorTypeWin32); | ||
else { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't need the else given that the line above is a return.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. Updated.
BOOL cancel_result = CancelIoEx(m_read, &m_read_overlapped); | ||
if (!cancel_result && GetLastError() == ERROR_NOT_FOUND) | ||
failed = false; | ||
if (!result) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could do an early return here if (result) {
. You'll have 2 copies of:
bytes_read = sys_bytes_read;
return Status();
But it could be easier to read.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. Updated.
"", // hostname | ||
pid, port, socket_name); | ||
if (error.Success()) | ||
SetPendingGdbServer(pid, *port, socket_name); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What happens with the implicit else
part of this, does GetPacketAndSendResponse
somehow fail immediately?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This code is just moved from lldb-platform.cpp
GetPacketAndSendResponse() ignores error
value. GetPacketAndSendResponse() must set the flag quit
or interrupt
and update error
.
LLDB_LOGF(log, | ||
"GDBRemoteCommunicationServerPlatform::%s() " | ||
"GetPacketAndSendResponse: %s", | ||
__FUNCTION__, error.AsCString()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I would've thought that __FUNCTION__
gave you the function name here but maybe not. Can you just try making it if (true)
and confirm what it prints?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Look at other LLDB_LOGF() usage in GDBRemoteCommunicationServerPlatform.cpp I just used the similar code and format. This will print GDBRemoteCommunicationServerPlatform::Proc() GetPacketAndSendResponse: lost connection
to the log on Linux and GDBRemoteCommunicationServerPlatform::lldb_private::process_gdb_remote::GDBRemoteCommunicationServerPlatform::Proc() GetPacketAndSendResponse: lost connection
on Windows.
@@ -159,6 +159,40 @@ GDBRemoteCommunicationServerPlatform::GDBRemoteCommunicationServerPlatform( | |||
GDBRemoteCommunicationServerPlatform::~GDBRemoteCommunicationServerPlatform() = | |||
default; | |||
|
|||
void GDBRemoteCommunicationServerPlatform::Proc( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we're changing the name could we have a better name than Proc
, or if Proc
is just a term I am unfamiliar with, a comment in the header to explain what this function does?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note void SetInferiorArguments(const lldb_private::Args &args);
is a missing function. It is a bug. I have just removed it and added Proc(). There is no any connection between these functions. I have just moved a common code from lldb-platform to GDBRemoteCommunicationServerPlatform::Proc().
Finally I have moved this code back to lldb-platform.cpp to client_handle(). So GDBRemoteCommunicationServerPlatform.* are untouched now.
@@ -27,11 +27,13 @@ class TestLogHandler : public LogHandler { | |||
: m_stream_sp(stream_sp) {} | |||
|
|||
void Emit(llvm::StringRef message) override { | |||
std::lock_guard<std::mutex> guard(m_mutex); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A mutex is now required because of the sharing of the socket I assume?
Could you add a comment on the member variable to say that as well.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is not related to the socket sharing. It is the fix of a crash. I have moved it to #101326.
Note also this comment.
if (num_bytes != sizeof(protocol_info)) { | ||
LLDB_LOGF(log, | ||
"lldb-platform child: " | ||
"socket_pipe(0x%x).ReadWithTimeout(WSAPROTOCOL_INFO) failed: " |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is missing accept_fd for the first %x I think.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. Updated.
What exactly does
Maybe you just have a typo or |
static GDBRemoteCommunicationServerPlatform::PortMap gdbserver_portmap; | ||
static std::mutex gdbserver_portmap_mutex; | ||
|
||
#if defined(_WIN32) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you think we will always have these large __WIN32 blocks, or if we go forward with your plan, will some of that become generic code once again?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can move this code to some windows specific directory. But it is windows specific anyway. Note it contains a lot of lldb-server platform
specific parameters. We can just pass the socket FD via the command line on non-Windows OS. lldb-server gdbserver
already supports it with the parameter --fd
. But it does not work on Windows. acceprt_fd here is a pipe FD which is used to transfer the structure WSAPROTOCOL_INFO. We need to know the child pid to initialize WSAPROTOCOL_INFO. Finally the child process is able to initialize the accepted socket FROM_PROTOCOL_INFO.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should slow down a bit.
For consistency, and to minimize the number of ifdefs, I believe windows and non-windows paths should use as similar approaches as possible. That means I think the end state should be that the posix path also uses a fork+exec approach.
For me, it also seems natural that this should be the first step. While doing that, we can make sure to create abstractions that can be used to implement the windows behavior as well. I'm imagining some sort of a function or a class, which takes the target binary, its arguments, and a socket, and launches that binary with that socket. (we'll probably also need a similar function/class on the other end to receive that socket).
The function doesn't have to be terribly generic, but it ideally should encompass all of platform specific behavior, so that the surrounding code does not need to use ifdefs. (But, ideally, it should at least be slightly generic, so that one can write a unit test for this functionality).
(BTW, I'm OOO for the rest of the week, so I won't be able to review this in detail. Upon cursory inspection, the general idea makes sense, but we really need to figure out how to encapsulate this. I'm not sure if we really need the WriteWithTimeout functionality. The reason we don't have it so far is that pipes have a buffer, and unless we're writing a huge chunk of data, the write will never block anyway. There's nothing wrong with it in principle though, and if you do want to have it, it'd be best if it comes with in its own patch and with a test.)
Sorry. I have updated the description with |
396b9a0
to
526708c
Compare
The explanation is here. This new argument is the fork() workaround for Windows. We can keep this new argument for non-Windows OS, but it is redundant because fork() is enough. |
fork+exec on non-Windows OS is redundant. It simply does not make sence. We can use multithreading or CreateProcess on Windows. It seems the multithreading solution faced a lot of issues and system's bugs on Linux. So CreateProcess is the only way. But it is the fork() workaround for Windows.
Look at GDBRemoteCommunication::StartDebugserverProcess(). I planed to extend it to use
Before this patch the pipe read/write 1 byte in ConnectionFileDescriptor::CommandPipe. The maximal size was 6 bytes (the port value) in GDBRemoteCommunication::StartDebugserverProcess() and it is only the place where ReadWithTimeout() is used. But WSAPROTOCOL_INFO is big enough (628 bytes). Usually the pipe buffer is 4K. But we need an ability to avoid hangs anyway. |
c785238
to
add315c
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
This is where I'll have to disagree with you. The exec following the fork is not "redundant" on linux. A naked fork is an extremely dangerous thing in todays (multithreaded) applications. I'm absolutely certain that the improper use of fork with threads (and the FD leaks that go along with it) is the cause of at least one of the issues (ETXTBSY) you refer to as "system bugs", and fairly sure it's also the cause of the other one (the pipe workaround) as well. In a multithreaded application (which lldb-platform isn't right now, but more of an accident than design, and which you've tried to make extremely multithreaded) the only safe use of fork is if it is immediately followed by an exec, and this has to be done in an extremely careful way (we try to do that, and we still failed). But even if it weren't for all of this, I'd still think it makes sense to start a new process, just for symmetry with windows. That's the nature of portable programming.
I didn't say it was going to be easy. :) I can look at this closer when I get back next week, but I definitely think we can and should come up with a solution involving less ifdefs.
628 is still less that 4k. Can the buffer be smaller than that?
We have one and a half pipe tests (one of them claims to be broken on windows). I wrote them when fixing some Pipe bugs. We should definitely have more. I wholeheartedly disagree with the assertion that the test is useless unless it uses multiple threads and high load. Even the most simple test serves as documents the correct/expected API usage and proves that the functionality works at least in the simple cases. For something like
That will cover most of the codepaths and obvious ways it can go wrong (e.g. during refactoring). Then, if you wanted to be fancy, you could do things like start a write with a long timeout and then try to read from the pipe, and see that causes the write to complete, etc.; but I wouldn't expect everyone to write those all the time. |
…t on Windows `lldb-server platform --server` works on Windows now w/o multithreading. The rest functionality remains unchanged. Depends on llvm#101383. Fixes llvm#90923, fixes llvm#56346. This is the part 1 of the replacement of llvm#100670. In the part 2 I plan to switch `lldb-server gdbserver` to use `--fd` and listen a common gdb port for all gdbserver connections. Then we can remove gdb port mapping to fix llvm#97537.
add315c
to
6b2a41b
Compare
I meant https://bugzilla.kernel.org/show_bug.cgi?id=546 and this discussion. I'm sad that you don't believe. I concluded that no one uses the real multithreading on Linux. Someone tried and faced unsolvable problems. Therefore, #100670 is doomed.
Not all code may be portable. It is better to use threads on Windows, but not on Posix. fork() is missing on Windows and starting a new process is a workaround. When we have the accepted socket and everything is ready to just handle the connection on Posix (after fork), you suggest to drop all initialized parameters, start a new process, pass all necessary parameters via the command line, initialize all parameters again, go to the same point and finally handle the connection. What is the reason? To be similar how it will work on Windows. No, I don't agree. You are worried about some FD. But the main process |
I have moved all Pipe related changes to #101383 and added the test. |
I know what you're referring to. However, that's a statement that comes with a very heavy burden of proof, one that you simply have not provided. The number of users of linux kernel exceeds the users of lldb by many orders of magnitude, so it's very unlikely that the problem is on their side. It's not that it's impossible -- I personally have tracked down lldb problems down to bugs in our dependencies -- from the libstdc++ to the kernel. However, this just doesn't look like one of those cases.
That's another very bold statement. I am not sure what you meant by "real multithreading", but I know for a fact that some Linux applications run thousands of threads in one process without any issues. All of them are very careful about how they use fork though.
That is exactly what I'm suggesting.
Well, at least we can agree to disagree. ;)
It forks. Forking duplicates any FD opened into the parent into the child. Even those marked with CLOEXEC, which is the usual way to guard against FD leakage. If you're single-threaded than this is manageable, but as soon as you have multiple threads (believe it or not, many applications on linux use multiple threads, and a lot of code creates background threads; lldb-server platform code executed here is pretty small, but it still calls into a lot of code that could create those), fork() becomes a ticking time bomb. In a multithreaded application, the only code that can be safely run after a fork, is that which can run in a signal handler. You can't write a server like that. Using fork to serve multiple connections is a pattern from the seventies. Modern applications don't do it this way. As far as I'm concerned, this code should not have been written this way (and if the author cared about windows, they would have never done it), and I'd support removing this pattern even if it weren't for windows.
That's nice, but I think we should figure out how to reduce the number of ifdefs in this patch. Porting linux away from fork may not be your concern, but figuring out how to make the code less branchy is. If you can do that without removing forks, I can take it upon myself to handle the rest. I just think it would be easier to do it both together... |
We can unify spawn_process_child() moving this code to ConnectionFileDescriptor::ConnectFD() in ConnectionFileDescriptorPosix.cpp with Note I will remove We can try to move spawn_process_parent() to Probably it is necessary to define a new type and an invalid value for accept_fd. It is int (socket fd) on Posix and pipe_t (HANDLE) on Windows. I would be happy to keep it as is till part 2 of this patch to minimize a redundant work. |
I have updated the patch to spawn the child process on non-Windows too and minimized the number of |
`lldb-server platform --server` works on Windows now w/o multithreading. The rest functionality remains unchanged. Fixes llvm#90923, fixes llvm#56346. This is the part 1 of the replacement of llvm#100670. In the part 2 I plan to switch `lldb-server gdbserver` to use `--fd` and listen a common gdb port for all gdbserver connections. Then we can remove gdb port mapping to fix llvm#97537.
f382414
to
714253b
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@labath I have added the class SharedSocket. I have implemented all the recommendations.
Looks mostly fine. Just a couple of notes. David, what do you think?
Note we have the buildbot for cross tests (Windows x64/Linux x64 host and Linux Aarch64 target).
So everything is tested. Hope 1191 API tests run with this lldb-server are enough.
That's great, but I don't think this should be the only way this code is tested, as most of it can be tested perfectly well locally. Most of the devs will not be running this bot configuration before they push their patches, and they will not know how to set it up when they get the breakage email, so it's always better to have a first line of defense that everyone can use, and leave the cross testing for catching subtle integration details.
That said, we do have that one test which should exercise most of this code, so I'm sort of fine with this for now, but I still may ask for more tests in the future, for example, when we librarize this code.
} | ||
|
||
// Used by the child process. | ||
SharedSocket(fd_t fd) : m_fd(fd) {} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SharedSocket(fd_t fd) : m_fd(fd) {} | |
explicit SharedSocket(fd_t fd) : m_fd(fd) {} |
return Status(); | ||
} | ||
|
||
Status GetNativeSocket(NativeSocket &socket) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this would be better off as a static function -- it's not using any of the members except m_fd, which could just as easily be a function argument, and it's duplicating the Pipe and NativeSocket members as local vars.
I can also imagine with a proper member function version of it, but then I think should reuse the members in the class. For example, by putting most of this work into the constructor, where it will initialize the m_pipe member and use it to receive the NativeSocket. GetNativeSocket
could then be a plain getter (?)
// Copy the current environment. | ||
// WSASocket(FROM_PROTOCOL_INFO) will fail in the child process | ||
// with the error WSAEPROVIDERFAILEDINIT if the SystemRoot is missing | ||
// in the environment. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FWIW, I don't think this requires any special justification. Environment inheritance is the default behavior when launching processes. I'd just delete this comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks good. Thanks for your patience.
Listen to gdbserver-port, accept the connection and run `lldb-server gdbserver --fd` on all platforms. Added acceptor_gdb and gdb_thread to lldb-platform.cpp SharedSocket has been moved to ConnectionFileDescriptorPosix. Parameters --min-gdbserver-port and --max-gdbserver-port are deprecated now. This is the part 2 of llvm#101283. Fixes llvm#97537.
Listen to gdbserver-port, accept the connection and run `lldb-server gdbserver --fd` on all platforms. Added acceptor_gdb and gdb_thread to lldb-platform.cpp SharedSocket has been moved to ConnectionFileDescriptorPosix. Parameters --min-gdbserver-port and --max-gdbserver-port are deprecated now. This is the part 2 of llvm#101283. Fixes llvm#97537, fixes llvm#101475.
Listen to gdbserver-port, accept the connection and run `lldb-server gdbserver --fd` on all platforms. Added acceptor_gdb and gdb_thread to lldb-platform.cpp SharedSocket has been moved to ConnectionFileDescriptorPosix. Parameters --min-gdbserver-port and --max-gdbserver-port are deprecated now. This is the part 2 of llvm#101283. Fixes llvm#97537, fixes llvm#101475.
Listen to gdbserver-port, accept the connection and run lldb-server gdbserver --fd on all platforms. Refactored Acceptor to listen on 2 ports in the main thread. Parameters --min-gdbserver-port and --max-gdbserver-port are deprecated now. This is the part 2 of llvm#101283. Fixes llvm#97537, fixes llvm#101475.
Listen to gdbserver-port, accept the connection and run lldb-server gdbserver --fd on all platforms. Refactored Acceptor to listen on 2 ports in the main thread. Parameters --min-gdbserver-port and --max-gdbserver-port are deprecated now. This is the part 2 of llvm#101283. Fixes llvm#97537, fixes llvm#101475.
Listen to gdbserver-port, accept the connection and run lldb-server gdbserver --fd on all platforms. Refactored Acceptor to listen on 2 ports in the main thread. Parameters --min-gdbserver-port and --max-gdbserver-port are deprecated now. This is the part 2 of llvm#101283. Fixes llvm#97537, fixes llvm#101475.
Listen to gdbserver-port, accept the connection and run lldb-server gdbserver --fd on all platforms. Parameters --min-gdbserver-port and --max-gdbserver-port are deprecated now. This is the part 2 of llvm#101283. Depends on llvm#106955. Fixes llvm#97537, fixes llvm#101475.
Listen to gdbserver-port, accept the connection and run lldb-server gdbserver --fd on all platforms. Parameters --min-gdbserver-port and --max-gdbserver-port are deprecated now. This is the part 2 of llvm#101283. Fixes llvm#97537, fixes llvm#101475.
Listen to gdbserver-port, accept the connection and run lldb-server gdbserver --fd on all platforms. Parameters --min-gdbserver-port and --max-gdbserver-port are deprecated now. This is the part 2 of llvm#101283. Fixes llvm#97537, fixes llvm#101475.
Listen to gdbserver-port, accept the connection and run lldb-server gdbserver --fd on all platforms. Parameters --min-gdbserver-port and --max-gdbserver-port are deprecated now. This is the part 2 of llvm#101283. Fixes llvm#97537, fixes llvm#101475.
Listen to gdbserver-port, accept the connection and run lldb-server gdbserver --fd on all platforms. Parameters --min-gdbserver-port and --max-gdbserver-port are deprecated now. This is the part 2 of llvm#101283. Fixes llvm#97537, fixes llvm#101475.
llvm#101283) `lldb-server platform --server` works on Windows now w/o multithreading. The rest functionality remains unchanged. Fixes llvm#90923, fixes llvm#56346. This is the part 1 of the replacement of llvm#100670. In the part 2 I plan to switch `lldb-server gdbserver` to use `--fd` and listen a common gdb port for all gdbserver connections. Then we can remove gdb port mapping to fiх llvm#97537. (cherry picked from commit 82ee31f)
lldb-server platform --server
works on Windows now w/o multithreading. The rest functionality remains unchanged.Fixes #90923, fixes #56346.
This is the part 1 of the replacement of #100670.
In the part 2 I plan to switch
lldb-server gdbserver
to use--fd
and listen a common gdb port for all gdbserver connections. Then we can remove gdb port mapping to fiх #97537.