Skip to content
This repository has been archived by the owner on Feb 26, 2020. It is now read-only.

Commit

Permalink
Implement a proper rw_tryupgrade
Browse files Browse the repository at this point in the history
Current rw_tryupgrade does rw_exit and then rw_tryenter(RW_RWITER), and then
does rw_enter(RW_READER) if it fails. This violate the assumption that
rw_tryupgrade should be atomic and could cause extra contention or even lock
inversion.

This patch we implement a proper rw_tryupgrade. For rwsem-spinlock, we take
the spinlock to check rwsem->count and rwsem->wait_list. For normal rwsem, we
use cmpxchg on rwsem->count to change the value from single reader to single
writer.

Signed-off-by: Chunwei Chen <david.chen@osnexus.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Tim Chase <tim@chase2k.com>
Closes openzfs/zfs#4692
Closes #554
  • Loading branch information
Chunwei Chen authored and behlendorf committed May 31, 2016
1 parent c60a51b commit f58040c
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 13 deletions.
25 changes: 25 additions & 0 deletions config/spl-build.m4
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ AC_DEFUN([SPL_AC_CONFIG_KERNEL], [
SPL_AC_2ARGS_ZLIB_DEFLATE_WORKSPACESIZE
SPL_AC_SHRINK_CONTROL_STRUCT
SPL_AC_RWSEM_SPINLOCK_IS_RAW
SPL_AC_RWSEM_ACTIVITY
SPL_AC_SCHED_RT_HEADER
SPL_AC_2ARGS_VFS_GETATTR
SPL_AC_USLEEP_RANGE
Expand Down Expand Up @@ -1316,6 +1317,30 @@ AC_DEFUN([SPL_AC_RWSEM_SPINLOCK_IS_RAW], [
EXTRA_KCFLAGS="$tmp_flags"
])

dnl #
dnl # 3.16 API Change
dnl #
dnl # rwsem-spinlock "->activity" changed to "->count"
dnl #
AC_DEFUN([SPL_AC_RWSEM_ACTIVITY], [
AC_MSG_CHECKING([whether struct rw_semaphore has member activity])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
SPL_LINUX_TRY_COMPILE([
#include <linux/rwsem.h>
],[
struct rw_semaphore dummy_semaphore __attribute__ ((unused));
dummy_semaphore.activity = 0;
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_RWSEM_ACTIVITY, 1,
[struct rw_semaphore has member activity])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])

dnl #
dnl # 3.9 API change,
dnl # Moved things from linux/sched.h to linux/sched/rt.h
Expand Down
17 changes: 17 additions & 0 deletions include/linux/rwsem_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,23 @@

#include <linux/rwsem.h>

#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
#define SPL_RWSEM_SINGLE_READER_VALUE (1)
#define SPL_RWSEM_SINGLE_WRITER_VALUE (-1)
#else
#define SPL_RWSEM_SINGLE_READER_VALUE (RWSEM_ACTIVE_READ_BIAS)
#define SPL_RWSEM_SINGLE_WRITER_VALUE (RWSEM_ACTIVE_WRITE_BIAS)
#endif

/* Linux 3.16 change activity to count for rwsem-spinlock */
#ifdef HAVE_RWSEM_ACTIVITY
#define RWSEM_COUNT(sem) sem->activity
#else
#define RWSEM_COUNT(sem) sem->count
#endif

int rwsem_tryupgrade(struct rw_semaphore *rwsem);

#if defined(RWSEM_SPINLOCK_IS_RAW)
#define spl_rwsem_lock_irqsave(lk, fl) raw_spin_lock_irqsave(lk, fl)
#define spl_rwsem_unlock_irqrestore(lk, fl) raw_spin_unlock_irqrestore(lk, fl)
Expand Down
11 changes: 4 additions & 7 deletions include/sys/rwlock.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,13 +223,10 @@ RW_LOCK_HELD(krwlock_t *rwp)
if (RW_WRITE_HELD(rwp)) { \
_rc_ = 1; \
} else { \
rw_exit(rwp); \
if (rw_tryenter(rwp, RW_WRITER)) { \
_rc_ = 1; \
} else { \
rw_enter(rwp, RW_READER); \
_rc_ = 0; \
} \
spl_rw_lockdep_off_maybe(rwp); \
if ((_rc_ = rwsem_tryupgrade(SEM(rwp)))) \
spl_rw_set_owner(rwp); \
spl_rw_lockdep_on_maybe(rwp); \
} \
_rc_; \
})
Expand Down
41 changes: 41 additions & 0 deletions module/spl/spl-rwlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,46 @@

#define DEBUG_SUBSYSTEM S_RWLOCK

#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
static int
__rwsem_tryupgrade(struct rw_semaphore *rwsem)
{
int ret = 0;
unsigned long flags;
spl_rwsem_lock_irqsave(&rwsem->wait_lock, flags);
if (RWSEM_COUNT(rwsem) == SPL_RWSEM_SINGLE_READER_VALUE &&
list_empty(&rwsem->wait_list)) {
ret = 1;
RWSEM_COUNT(rwsem) = SPL_RWSEM_SINGLE_WRITER_VALUE;
}
spl_rwsem_unlock_irqrestore(&rwsem->wait_lock, flags);
return (ret);
}
#else
static int
__rwsem_tryupgrade(struct rw_semaphore *rwsem)
{
typeof (rwsem->count) val;
val = cmpxchg(&rwsem->count, SPL_RWSEM_SINGLE_READER_VALUE,
SPL_RWSEM_SINGLE_WRITER_VALUE);
return (val == SPL_RWSEM_SINGLE_READER_VALUE);
}
#endif

int
rwsem_tryupgrade(struct rw_semaphore *rwsem)
{
if (__rwsem_tryupgrade(rwsem)) {
rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
rwsem->owner = current;
#endif
return (1);
}
return (0);
}
EXPORT_SYMBOL(rwsem_tryupgrade);

int spl_rw_init(void) { return 0; }
void spl_rw_fini(void) { }
66 changes: 60 additions & 6 deletions module/splat/splat-rwlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,12 @@
#define SPLAT_RWLOCK_TEST5_DESC "Write downgrade"

#define SPLAT_RWLOCK_TEST6_ID 0x0706
#define SPLAT_RWLOCK_TEST6_NAME "rw_tryupgrade"
#define SPLAT_RWLOCK_TEST6_DESC "Read upgrade"
#define SPLAT_RWLOCK_TEST6_NAME "rw_tryupgrade-1"
#define SPLAT_RWLOCK_TEST6_DESC "rwsem->count value"

#define SPLAT_RWLOCK_TEST7_ID 0x0707
#define SPLAT_RWLOCK_TEST7_NAME "rw_tryupgrade-2"
#define SPLAT_RWLOCK_TEST7_DESC "Read upgrade"

#define SPLAT_RWLOCK_TEST_MAGIC 0x115599DDUL
#define SPLAT_RWLOCK_TEST_NAME "rwlock_test"
Expand Down Expand Up @@ -580,8 +584,55 @@ splat_rwlock_test6(struct file *file, void *arg)
splat_init_rw_priv(rwp, file);

rw_enter(&rwp->rw_rwlock, RW_READER);
if (!RW_READ_HELD(&rwp->rw_rwlock)) {
if (RWSEM_COUNT(SEM(&rwp->rw_rwlock)) !=
SPL_RWSEM_SINGLE_READER_VALUE) {
splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
"We assumed single reader rwsem->count "
"should be %ld, but is %ld\n",
SPL_RWSEM_SINGLE_READER_VALUE,
RWSEM_COUNT(SEM(&rwp->rw_rwlock)));
rc = -ENOLCK;
goto out;
}
rw_exit(&rwp->rw_rwlock);

rw_enter(&rwp->rw_rwlock, RW_WRITER);
if (RWSEM_COUNT(SEM(&rwp->rw_rwlock)) !=
SPL_RWSEM_SINGLE_WRITER_VALUE) {
splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
"We assumed single writer rwsem->count "
"should be %ld, but is %ld\n",
SPL_RWSEM_SINGLE_WRITER_VALUE,
RWSEM_COUNT(SEM(&rwp->rw_rwlock)));
rc = -ENOLCK;
goto out;
}
rc = 0;
splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s",
"rwsem->count same as we assumed\n");
out:
rw_exit(&rwp->rw_rwlock);
rw_destroy(&rwp->rw_rwlock);
kfree(rwp);

return rc;
}

static int
splat_rwlock_test7(struct file *file, void *arg)
{
rw_priv_t *rwp;
int rc;

rwp = (rw_priv_t *)kmalloc(sizeof(*rwp), GFP_KERNEL);
if (rwp == NULL)
return -ENOMEM;

splat_init_rw_priv(rwp, file);

rw_enter(&rwp->rw_rwlock, RW_READER);
if (!RW_READ_HELD(&rwp->rw_rwlock)) {
splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME,
"rwlock should be read lock: %d\n",
RW_READ_HELD(&rwp->rw_rwlock));
rc = -ENOLCK;
Expand All @@ -591,23 +642,23 @@ splat_rwlock_test6(struct file *file, void *arg)
/* With one reader upgrade should never fail. */
rc = rw_tryupgrade(&rwp->rw_rwlock);
if (!rc) {
splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME,
splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME,
"rwlock failed upgrade from reader: %d\n",
RW_READ_HELD(&rwp->rw_rwlock));
rc = -ENOLCK;
goto out;
}

if (RW_READ_HELD(&rwp->rw_rwlock) || !RW_WRITE_HELD(&rwp->rw_rwlock)) {
splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "rwlock should "
splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME, "rwlock should "
"have 0 (not %d) reader and 1 (not %d) writer\n",
RW_READ_HELD(&rwp->rw_rwlock),
RW_WRITE_HELD(&rwp->rw_rwlock));
goto out;
}

rc = 0;
splat_vprint(file, SPLAT_RWLOCK_TEST6_NAME, "%s",
splat_vprint(file, SPLAT_RWLOCK_TEST7_NAME, "%s",
"rwlock properly upgraded\n");
out:
rw_exit(&rwp->rw_rwlock);
Expand Down Expand Up @@ -646,6 +697,8 @@ splat_rwlock_init(void)
SPLAT_RWLOCK_TEST5_ID, splat_rwlock_test5);
SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST6_NAME, SPLAT_RWLOCK_TEST6_DESC,
SPLAT_RWLOCK_TEST6_ID, splat_rwlock_test6);
SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST7_NAME, SPLAT_RWLOCK_TEST7_DESC,
SPLAT_RWLOCK_TEST7_ID, splat_rwlock_test7);

return sub;
}
Expand All @@ -654,6 +707,7 @@ void
splat_rwlock_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST7_ID);
SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST6_ID);
SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST5_ID);
SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST4_ID);
Expand Down

0 comments on commit f58040c

Please sign in to comment.