From 2a8085682edadf63a87431ae6315e5d2dbbbc06f Mon Sep 17 00:00:00 2001 From: David Chisnall Date: Tue, 12 May 2020 12:10:49 +0000 Subject: [PATCH] Add support for clone to LKL. LKL supports two kinds of thread: - Kernel threads, which have a host thread associated with them but which are run by the kernel scheduler. These threads are triggered to run my prodding a semaphore in `__switch_to` - Host threads, which exist outside of the LKL world and have a Linux task structure assigned to them on their first system call. Threads created with clone are somewhere between these two. They are host threads, but they are created from within the Linux kernel and so must have their task structure assigned early. They are distinct from normal host threads in several ways. Their stack pointer, TLS area, and initial program counter value are defined by the relevant system call and their lifecycle is manaso we need an additional mechanism in the host interface for creating them. Cloned threads are created with a new host op that takes the instruction pointer, stack pointer, and return address, along with the task ID and the TLS key for storing the task. The host is responsible for setting up a thread with these characteristics. Cloned thread destruction also requires a new host op. Existing host threads terminate by exiting normally and their associated task structure is destroyed by the TLS destructor. In contrast, cloned thread exit by performing an exit system call, which never returns. The new host op explicitly destroys the thread. There is no guarantee that the stack exists after the thread is destroyed, so the host environment must support a remote destroy operation. --- arch/lkl/Kconfig | 1 + arch/lkl/include/asm/sched.h | 2 ++ arch/lkl/include/asm/syscalls.h | 1 - arch/lkl/include/asm/thread_info.h | 7 +++++ arch/lkl/include/uapi/asm/host_ops.h | 12 ++++++++ arch/lkl/include/uapi/asm/unistd.h | 2 ++ arch/lkl/kernel/syscalls.c | 40 +++++++++++++++++++++++++- arch/lkl/kernel/threads.c | 42 +++++++++++++++++++++++++--- 8 files changed, 101 insertions(+), 6 deletions(-) diff --git a/arch/lkl/Kconfig b/arch/lkl/Kconfig index 28019d31a2b212..02b053dfc5d184 100644 --- a/arch/lkl/Kconfig +++ b/arch/lkl/Kconfig @@ -35,6 +35,7 @@ config LKL select IPV6_ADVANCED_ROUTER select ARCH_NO_COHERENT_DMA_MMAP select HAVE_MEMBLOCK + select HAVE_COPY_THREAD_TLS select NO_BOOTMEM config OUTPUT_FORMAT diff --git a/arch/lkl/include/asm/sched.h b/arch/lkl/include/asm/sched.h index da7376363e6f97..be2bf1bc74baeb 100644 --- a/arch/lkl/include/asm/sched.h +++ b/arch/lkl/include/asm/sched.h @@ -25,4 +25,6 @@ struct task_struct* lkl_get_current_task_struct(void); int host_task_stub(void *unused); +extern struct lkl_tls_key *task_key; + #endif /* _ASM_LKL_SCHED_H */ diff --git a/arch/lkl/include/asm/syscalls.h b/arch/lkl/include/asm/syscalls.h index 333e9393ec3d3d..9ff23054a37ec4 100644 --- a/arch/lkl/include/asm/syscalls.h +++ b/arch/lkl/include/asm/syscalls.h @@ -18,7 +18,6 @@ void wakeup_idle_host_task(void); #define sys_mmap sys_mmap_pgoff #define sys_mmap2 sys_mmap_pgoff -#define sys_clone sys_ni_syscall #define sys_vfork sys_ni_syscall #define sys_rt_sigreturn sys_ni_syscall diff --git a/arch/lkl/include/asm/thread_info.h b/arch/lkl/include/asm/thread_info.h index 79a6b781d2c07a..1022ebd55815e7 100644 --- a/arch/lkl/include/asm/thread_info.h +++ b/arch/lkl/include/asm/thread_info.h @@ -23,6 +23,12 @@ struct thread_info { lkl_thread_t tid; struct task_struct *prev_sched; unsigned long stackend; + /* The return address from the currently executing syscall. Invalid when + * the thread is not executing a syscall. */ + void *syscall_ret; + /* The task for any child that was created during syscall execution. Only + * valid on return from a clone-family syscall. */ + struct task_struct *cloned_child; }; #define INIT_THREAD_INFO(tsk) \ @@ -57,6 +63,7 @@ void threads_cleanup(void); #define TIF_SCHED_JB 7 #define TIF_HOST_THREAD 8 #define TIF_NO_TERMINATION 9 // Do not terminate LKL on exit +#define TIF_CLONED_HOST_THREAD 10 // This is a host thread created via a clone-family call. #define __HAVE_THREAD_FUNCTIONS diff --git a/arch/lkl/include/uapi/asm/host_ops.h b/arch/lkl/include/uapi/asm/host_ops.h index c3cc05e11b0dcc..1b6360958cafff 100644 --- a/arch/lkl/include/uapi/asm/host_ops.h +++ b/arch/lkl/include/uapi/asm/host_ops.h @@ -56,6 +56,13 @@ struct ucontext; * * @thread_create - create a new thread and run f(arg) in its context; returns a * thread handle or 0 if the thread could not be created + * @thread_create_host - create a new thread as the result of a fork-like call + * and initialises its register set to the provided program counter, stack + * pointer, and TLS area; returns a thread handle or 0 if the thread could not + * be created + * @thread_destroy_host - destroys the state associated with a host thread that + * has exited via an exit system call. The task_key argument is the TLS + * variable containing the task. The destructor for this must not be run. * @thread_detach - on POSIX systems, free up resources held by * pthreads. Noop on Win32. * @thread_exit - terminates the current thread @@ -119,6 +126,11 @@ struct lkl_host_operations { void (*mutex_unlock)(struct lkl_mutex *mutex); lkl_thread_t (*thread_create)(void (*f)(void *), void *arg); + lkl_thread_t (*thread_create_host)(void* pc, void* sp, void* tls, + struct lkl_tls_key* task_key, void* task_value); + void (*thread_destroy_host)(lkl_thread_t tid, struct lkl_tls_key* + task_key); + void (*thread_detach)(void); void (*thread_exit)(void); int (*thread_join)(lkl_thread_t tid); diff --git a/arch/lkl/include/uapi/asm/unistd.h b/arch/lkl/include/uapi/asm/unistd.h index b30064b1e1857d..c3cf82d4995ab7 100644 --- a/arch/lkl/include/uapi/asm/unistd.h +++ b/arch/lkl/include/uapi/asm/unistd.h @@ -5,6 +5,8 @@ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT #define __ARCH_WANT_TIME32_SYSCALLS +#define __ARCH_WANT_SYS_CLONE + #include diff --git a/arch/lkl/kernel/syscalls.c b/arch/lkl/kernel/syscalls.c index 0c5038c94dc535..01b0a9b88a1808 100644 --- a/arch/lkl/kernel/syscalls.c +++ b/arch/lkl/kernel/syscalls.c @@ -116,7 +116,7 @@ static void del_host_task(void *arg) lkl_ops->jmp_buf_set(&ti->sched_jb, exit_task); } -static struct lkl_tls_key *task_key; +struct lkl_tls_key *task_key; /* Use this to record an ongoing LKL shutdown */ _Atomic(bool) lkl_shutdown = false; @@ -131,6 +131,7 @@ struct task_struct* lkl_get_current_task_struct(void) long lkl_syscall(long no, long *params) { struct task_struct *task = host0; + struct thread_info *ti; long ret; LKL_TRACE( @@ -172,6 +173,14 @@ long lkl_syscall(long no, long *params) } } + ti = task_thread_info(task); + /* + * Store the return address so that it can be used in clone and similar + * calls. In conventional arch ports, this would happen for free because + * the system call would capture the register state of the callee. + */ + ti->syscall_ret = __builtin_return_address(0); + LKL_TRACE("switching to host task (no=%li task=%s current=%s)\n", no, task->comm, current->comm); @@ -185,6 +194,10 @@ long lkl_syscall(long no, long *params) LKL_TRACE("returned from run_syscall() (no=%li task=%s current=%s)\n", no, task->comm, current->comm); + /* + * Zero the return address so that nothing accidentally sees a stale value. + */ + ti->syscall_ret = 0; task_work_run(); /* @@ -201,6 +214,31 @@ long lkl_syscall(long no, long *params) } out: + /* + * If we have created a new host task, make sure that it isn't on the + * scheduler queue when we return. LKL expects that the only tasks driven + * by the Linux scheduler are kernel threads. If releasing the CPU lock + * entirely and there are runnable tasks, `lkl_cpu_put` may run the + * scheduler and not release the lock. The scheduler hands the CPU lock to + * the next running thread and `lkl_cpu_put` expects this to be the idle + * host task (which then releases the lock). If host tasks are scheduled, + * they will be left running (and owning the CPU lock) and `lkl_cpu_put` + * will return without anything having released the lock. LKL will then + * deadlock on the next system call. + */ + if (ti->cloned_child) + { + struct task_struct *child = ti->cloned_child; + ti->cloned_child = NULL; + /* + * We can't change the scheduler state of a task that isn't running, so + * switch to the task and then mark it as uninteruptible. + */ + switch_to_host_task(child); + child->state = TASK_UNINTERRUPTIBLE; + /* Switch back to the calling task before we return. */ + switch_to_host_task(task); + } lkl_cpu_put(); LKL_TRACE("done (no=%li task=%s current=%s ret=%i)\n", no, diff --git a/arch/lkl/kernel/threads.c b/arch/lkl/kernel/threads.c index a5d1dc24db86e9..501eaa899047e7 100644 --- a/arch/lkl/kernel/threads.c +++ b/arch/lkl/kernel/threads.c @@ -74,14 +74,23 @@ static void kill_thread(struct thread_info *ti) lkl_ops->sem_up(ti->sched_sem); lkl_ops->thread_join(ti->tid); } else { - + /* + * If this is a task backing a host thread created by clone, then we + * need to destroy the associated host thread, but not exit LKL. + */ + if (test_ti_thread_flag(ti, TIF_CLONED_HOST_THREAD)) { + clear_ti_thread_flag(ti, TIF_CLONED_HOST_THREAD); + ti->dead = true; + BUG_ON(!lkl_ops->thread_destroy_host); + lkl_ops->thread_destroy_host(ti->tid, task_key); + ti->tid = 0; /* * Check if the host thread was killed due to its deallocation when * the associated application thread terminated gracefully. If not, * the thread has terminated due to a SYS_exit or a signal. In this * case, we need to notify the host to initiate an LKL shutdown. */ - if (!test_ti_thread_flag(ti, TIF_NO_TERMINATION)) { + } else if (!test_ti_thread_flag(ti, TIF_NO_TERMINATION)) { int exit_code = task->exit_code; int exit_status = exit_code >> 8; int received_signal = exit_code & 255; @@ -239,8 +248,8 @@ static void thread_bootstrap(void *_tba) do_exit(0); } -int copy_thread(unsigned long clone_flags, unsigned long esp, - unsigned long unused, struct task_struct *p) +int copy_thread_tls(unsigned long clone_flags, unsigned long esp, + unsigned long unused, struct task_struct *p, unsigned long tls) { LKL_TRACE("enter\n"); @@ -252,6 +261,31 @@ int copy_thread(unsigned long clone_flags, unsigned long esp, return 0; } + /* + * If we are creating a new userspace thread and are in the middle of a + * system call, create a new host thread coupled with this task. The + * second check is necessary because we also hit this path when lazily + * binding a host thread to a new task on system call entry. + */ + void *pc = task_thread_info(current)->syscall_ret; + if (pc && !(p->flags & PF_KTHREAD)) { + /* + * If we have host support for creating new threads with fine-grained + * control over their initial state, use it to create a new host + * thread. + */ + if (lkl_ops->thread_create_host) { + static unsigned long long clone_count = 0; + set_ti_thread_flag(ti, TIF_HOST_THREAD); + set_ti_thread_flag(ti, TIF_CLONED_HOST_THREAD); + ti->tid = lkl_ops->thread_create_host(pc, (void*)esp, (void*)tls, task_key, p); + snprintf(p->comm, sizeof(p->comm), "host_clone%llu", __sync_fetch_and_add(&clone_count, 1)); + current_thread_info()->cloned_child = p; + return (ti->tid == 0) ? -ENOMEM : 0; + } + return -ENODEV; + } + tba = kmalloc(sizeof(*tba), GFP_KERNEL); if (!tba) return -ENOMEM;