Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LibOS] fs_lock: Use a separate lock for all operations #283

Merged
merged 1 commit into from
Dec 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions LibOS/shim/include/shim_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,12 +161,11 @@ struct shim_dentry {
* filesystem (see `shim_inode` below). Protected by `lock`. */
struct shim_inode* inode;

/* File lock information, stored only in the main process. Protected by `lock`. See
* `shim_fs_lock.c`. */
/* File lock information, stored only in the main process. Managed by `shim_fs_lock.c`. */
struct fs_lock* fs_lock;

/* True if the file might have locks placed by current process. Used in processes other than
* main process, to prevent unnecessary IPC calls on handle close. Protected by `lock`. See
* main process, to prevent unnecessary IPC calls on handle close. Managed by
* `shim_fs_lock.c`. */
bool maybe_has_fs_locks;

Expand Down
119 changes: 36 additions & 83 deletions LibOS/shim/src/fs/shim_fs_lock.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@
#include "shim_ipc.h"
#include "shim_lock.h"

/*
* Global lock for the whole subsystem. Protects access to `g_fs_lock_list`, and also to dentry
* fields (`fs_lock` and `maybe_has_fs_locks`).
*/
static struct shim_lock g_fs_lock_lock;

/*
* Describes a pending request for a POSIX lock. After processing the request, the object is
* removed, and a possible waiter is notified (see below).
Expand Down Expand Up @@ -46,10 +52,10 @@ struct fs_lock {
struct shim_dentry* dent;

/* POSIX locks, sorted by PID and then by start position (so that we are able to merge and split
* locks). The ranges do not overlap within a given PID. Protected by `dent->lock`. */
* locks). The ranges do not overlap within a given PID. */
LISTP_TYPE(posix_lock) posix_locks;

/* Pending requests. Protected by `dent->lock`. */
/* Pending requests. */
LISTP_TYPE(posix_lock_request) posix_lock_requests;

/* List node, for `g_fs_lock_list`. */
Expand All @@ -59,9 +65,6 @@ struct fs_lock {
/* Global list of `fs_lock` objects. Used for cleanup. */
static LISTP_TYPE(fs_lock) g_fs_lock_list = LISTP_INIT;

/* Global lock for `g_fs_lock_list`. */
static struct shim_lock g_fs_lock_lock;

int init_fs_lock(void) {
if (g_process_ipc_ids.leader_vmid)
return 0;
Expand All @@ -70,7 +73,7 @@ int init_fs_lock(void) {
}

static int find_fs_lock(struct shim_dentry* dent, bool create, struct fs_lock** out_fs_lock) {
assert(locked(&dent->lock));
assert(locked(&g_fs_lock_lock));
if (!dent->fs_lock && create) {
struct fs_lock* fs_lock = malloc(sizeof(*fs_lock));
if (!fs_lock)
Expand All @@ -81,9 +84,7 @@ static int find_fs_lock(struct shim_dentry* dent, bool create, struct fs_lock**
INIT_LISTP(&fs_lock->posix_lock_requests);
dent->fs_lock = fs_lock;

lock(&g_fs_lock_lock);
LISTP_ADD(fs_lock, &g_fs_lock_list, list);
unlock(&g_fs_lock_lock);
}
*out_fs_lock = dent->fs_lock;
return 0;
Expand All @@ -97,7 +98,7 @@ static int posix_lock_dump_write_all(const char* str, size_t size, void* arg) {

/* Log current locks for a file, for debugging purposes. */
static void posix_lock_dump(struct fs_lock* fs_lock) {
assert(locked(&fs_lock->dent->lock));
assert(locked(&g_fs_lock_lock));
struct print_buf buf = INIT_PRINT_BUF(&posix_lock_dump_write_all);
IDTYPE pid = 0;

Expand Down Expand Up @@ -130,16 +131,14 @@ static void posix_lock_dump(struct fs_lock* fs_lock) {

/* Removes `fs_lock` if it's not necessary (i.e. no locks are held or requested for a file). */
static void fs_lock_gc(struct fs_lock* fs_lock) {
assert(locked(&fs_lock->dent->lock));
assert(locked(&g_fs_lock_lock));
if (g_log_level >= LOG_LEVEL_TRACE)
posix_lock_dump(fs_lock);
if (LISTP_EMPTY(&fs_lock->posix_locks) && LISTP_EMPTY(&fs_lock->posix_lock_requests)) {
struct shim_dentry* dent = fs_lock->dent;
dent->fs_lock = NULL;

lock(&g_fs_lock_lock);
LISTP_DEL(fs_lock, &g_fs_lock_list, list);
unlock(&g_fs_lock_lock);

put_dentry(dent);
free(fs_lock);
Expand All @@ -151,7 +150,7 @@ static void fs_lock_gc(struct fs_lock* fs_lock) {
* ranges overlap, and at least one of them is a write lock.
*/
static struct posix_lock* posix_lock_find_conflict(struct fs_lock* fs_lock, struct posix_lock* pl) {
assert(locked(&fs_lock->dent->lock));
assert(locked(&g_fs_lock_lock));
assert(pl->type != F_UNLCK);

struct posix_lock* cur;
Expand All @@ -164,12 +163,12 @@ static struct posix_lock* posix_lock_find_conflict(struct fs_lock* fs_lock, stru
}

/*
* Add a new lock request. Before releasing `fs_lock->dent->lock`, the caller has to initialize the
* Add a new lock request. Before releasing `g_fs_lock_lock`, the caller has to initialize the
* `notify` part of the request (see `struct posix_lock_request` above).
*/
static int posix_lock_add_request(struct fs_lock* fs_lock, struct posix_lock* pl,
struct posix_lock_request** out_req) {
assert(locked(&fs_lock->dent->lock));
assert(locked(&g_fs_lock_lock));
assert(pl->type != F_UNLCK);

struct posix_lock_request* req = malloc(sizeof(*req));
Expand All @@ -189,7 +188,7 @@ static int posix_lock_add_request(struct fs_lock* fs_lock, struct posix_lock* pl
* See also Linux sources (`fs/locks.c`) for a similar implementation.
*/
static int _posix_lock_set(struct fs_lock* fs_lock, struct posix_lock* pl) {
assert(locked(&fs_lock->dent->lock));
assert(locked(&g_fs_lock_lock));

/* Preallocate new locks first, so that we don't fail after modifying something. */

Expand Down Expand Up @@ -355,7 +354,7 @@ static int _posix_lock_set(struct fs_lock* fs_lock, struct posix_lock* pl) {
* TODO: This is pretty inefficient, but perhaps good enough for now...
*/
static void posix_lock_process_requests(struct fs_lock* fs_lock) {
assert(locked(&fs_lock->dent->lock));
assert(locked(&g_fs_lock_lock));

bool changed;
do {
Expand Down Expand Up @@ -397,7 +396,7 @@ static void posix_lock_process_requests(struct fs_lock* fs_lock) {
* request (if `wait` is true). */
static int posix_lock_set_or_add_request(struct shim_dentry* dent, struct posix_lock* pl, bool wait,
struct posix_lock_request** out_req) {
assert(locked(&dent->lock));
assert(locked(&g_fs_lock_lock));

struct fs_lock* fs_lock = NULL;
int ret = find_fs_lock(dent, /*create=*/pl->type != F_UNLCK, &fs_lock);
Expand Down Expand Up @@ -443,15 +442,15 @@ int posix_lock_set(struct shim_dentry* dent, struct posix_lock* pl, bool wait) {
if (g_process_ipc_ids.leader_vmid) {
/* In the IPC version, we use `dent->maybe_has_fs_locks` to short-circuit unlocking files
* that we never locked. This is to prevent unnecessary IPC calls on a handle. */
lock(&dent->lock);
lock(&g_fs_lock_lock);
if (pl->type == F_RDLCK || pl->type == F_WRLCK) {
dent->maybe_has_fs_locks = true;
} else if (!dent->maybe_has_fs_locks) {
/* We know we're not holding any locks for the file */
unlock(&dent->lock);
unlock(&g_fs_lock_lock);
return 0;
}
unlock(&dent->lock);
unlock(&g_fs_lock_lock);

char* path;
ret = dentry_abs_path(dent, &path, /*size=*/NULL);
Expand All @@ -463,7 +462,7 @@ int posix_lock_set(struct shim_dentry* dent, struct posix_lock* pl, bool wait) {
return ret;
}

lock(&dent->lock);
lock(&g_fs_lock_lock);

PAL_HANDLE event = NULL;
struct posix_lock_request* req = NULL;
Expand All @@ -483,9 +482,9 @@ int posix_lock_set(struct shim_dentry* dent, struct posix_lock* pl, bool wait) {
req->notify.event = event;
req->notify.result = &result;

unlock(&dent->lock);
unlock(&g_fs_lock_lock);
ret = object_wait_with_retry(event);
lock(&dent->lock);
lock(&g_fs_lock_lock);
if (ret < 0)
goto out;

Expand All @@ -494,7 +493,7 @@ int posix_lock_set(struct shim_dentry* dent, struct posix_lock* pl, bool wait) {
ret = 0;
}
out:
unlock(&dent->lock);
unlock(&g_fs_lock_lock);
if (event)
DkObjectClose(event);
return ret;
Expand All @@ -515,9 +514,9 @@ int posix_lock_set_from_ipc(const char* path, struct posix_lock* pl, bool wait,
goto out;
}

lock(&dent->lock);
lock(&g_fs_lock_lock);
ret = posix_lock_set_or_add_request(dent, pl, wait, &req);
unlock(&dent->lock);
unlock(&g_fs_lock_lock);
if (ret < 0)
goto out;

Expand Down Expand Up @@ -556,7 +555,7 @@ int posix_lock_get(struct shim_dentry* dent, struct posix_lock* pl, struct posix
return ret;
}

lock(&dent->lock);
lock(&g_fs_lock_lock);

struct fs_lock* fs_lock = NULL;
ret = find_fs_lock(dent, /*create=*/false, &fs_lock);
Expand All @@ -580,7 +579,7 @@ int posix_lock_get(struct shim_dentry* dent, struct posix_lock* pl, struct posix
if (fs_lock)
fs_lock_gc(fs_lock);

unlock(&dent->lock);
unlock(&g_fs_lock_lock);
return ret;
}

Expand All @@ -601,39 +600,9 @@ int posix_lock_get_from_ipc(const char* path, struct posix_lock* pl, struct posi
return ret;
}

/* Create an array with all the dentries that currently have locks. Takes a reference to the
* dentries. */
static int collect_dents_with_locks(size_t* out_count, struct shim_dentry*** out_dents) {
assert(locked(&g_fs_lock_lock));

struct fs_lock* fs_lock;

size_t count = 0;
LISTP_FOR_EACH_ENTRY(fs_lock, &g_fs_lock_list, list) {
count++;
}
struct shim_dentry** dents = malloc(count * sizeof(dents[0]));
if (!dents)
return -ENOMEM;

size_t i = 0;
LISTP_FOR_EACH_ENTRY(fs_lock, &g_fs_lock_list, list) {
/* It's safe to access `fs_lock->dent`: it doesn't change, `fs_lock` will not be deallocated
* before removing from list, and the reference for `dent` will not be dropped before
* removing `fs_lock`. */
get_dentry(fs_lock->dent);
dents[i++] = fs_lock->dent;
}
assert(i == count);

*out_count = count;
*out_dents = dents;
return 0;
}

/* Removes all locks and lock requests for a given PID and dentry. */
static int posix_lock_clear_pid_from_dentry(struct shim_dentry* dent, IDTYPE pid) {
assert(locked(&dent->lock));
assert(locked(&g_fs_lock_lock));

struct fs_lock* fs_lock;
int ret = find_fs_lock(dent, /*create=*/false, &fs_lock);
Expand Down Expand Up @@ -681,37 +650,21 @@ int posix_lock_clear_pid(IDTYPE pid) {

log_debug("clearing POSIX locks for pid %d", pid);

/*
* Note that we cannot traverse `g_fs_lock_list` processing the elements along the way, because
* we always take the locks for individual dentries *before* the global lock.
*
* Instead, we first traverse the list to collect the dentries, and then process them. Even if
* the list of locks changes in the meantime, at this point no new locks for the given PID will
* appear.
*/

size_t count;
struct shim_dentry** dents;
int ret;

lock(&g_fs_lock_lock);
ret = collect_dents_with_locks(&count, &dents);
unlock(&g_fs_lock_lock);
if (ret < 0)
return ret;
struct fs_lock* fs_lock;
struct fs_lock* fs_lock_tmp;

for (size_t i = 0; i < count; i++) {
lock(&dents[i]->lock);
ret = posix_lock_clear_pid_from_dentry(dents[i], pid);
unlock(&dents[i]->lock);
lock(&g_fs_lock_lock);
LISTP_FOR_EACH_ENTRY_SAFE(fs_lock, fs_lock_tmp, &g_fs_lock_list, list) {
/* Note that the below call might end up deleting `fs_lock` */
ret = posix_lock_clear_pid_from_dentry(fs_lock->dent, pid);
if (ret < 0)
goto out;
}

ret = 0;
out:
for (size_t i = 0; i < count; i++)
put_dentry(dents[i]);
free(dents);
unlock(&g_fs_lock_lock);
return ret;
}