Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MMP delay logic changes #7068

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion include/sys/mmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ extern "C" {
#define MMP_MIN_INTERVAL 100 /* ms */
#define MMP_DEFAULT_INTERVAL 1000 /* ms */
#define MMP_DEFAULT_IMPORT_INTERVALS 10
#define MMP_DEFAULT_FAIL_INTERVALS 5

typedef struct mmp_thread {
kmutex_t mmp_thread_lock; /* protect thread mgmt fields */
Expand Down
60 changes: 41 additions & 19 deletions module/zfs/mmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -115,14 +115,13 @@ uint_t zfs_multihost_import_intervals = MMP_DEFAULT_IMPORT_INTERVALS;
* configuration may take action such as suspending the pool or taking a
* device offline.
*
* When zfs_multihost_fail_intervals > 0 then sequential mmp write failures will
* cause the pool to be suspended. This occurs when
* When zfs_multihost_fail_intervals > 0 then sequential mmp write failures
* will cause the pool to be suspended. This occurs when
* zfs_multihost_fail_intervals * zfs_multihost_interval milliseconds have
* passed since the last successful mmp write. This guarantees the activity
* test will see mmp writes if the
* pool is imported.
* test will see mmp writes if the pool is imported.
*/
uint_t zfs_multihost_fail_intervals = MMP_DEFAULT_FAIL_INTERVALS;
uint_t zfs_multihost_fail_intervals = MMP_DEFAULT_IMPORT_INTERVALS / 2;

static void mmp_thread(void *arg);

Expand Down Expand Up @@ -249,7 +248,7 @@ mmp_write_done(zio_t *zio)
goto unlock;

/*
* Mmp writes are queued on a fixed schedule, but under many
* MMP writes are queued on a fixed schedule, but under many
* circumstances, such as a busy device or faulty hardware,
* the writes will complete at variable, much longer,
* intervals. In these cases, another node checking for
Expand All @@ -269,8 +268,7 @@ mmp_write_done(zio_t *zio)
if (delay > mts->mmp_delay)
mts->mmp_delay = delay;
else
mts->mmp_delay = (delay + mts->mmp_delay * 127) /
128;
mts->mmp_delay = (delay + mts->mmp_delay * 127) / 128;
} else {
mts->mmp_delay = 0;
}
Expand Down Expand Up @@ -381,16 +379,40 @@ mmp_thread(void *arg)
uint64_t mmp_fail_intervals = zfs_multihost_fail_intervals;
uint64_t mmp_interval = MSEC2NSEC(
MAX(zfs_multihost_interval, MMP_MIN_INTERVAL));
uint64_t fail_interval;
int vdev_leaves = MAX(vdev_count_leaves(spa), 1);
boolean_t suspended = spa_suspended(spa);
boolean_t multihost = spa_multihost(spa);
hrtime_t start, next_time;

start = gethrtime();
if (multihost) {
next_time = start + mmp_interval /
MAX(vdev_count_leaves(spa), 1);
} else {
if (multihost)
next_time = start + mmp_interval / vdev_leaves;
else
next_time = start + MSEC2NSEC(MMP_DEFAULT_INTERVAL);

/*
* Don't allow fail_intervals larger than import_intervals,
* as this could allow an active pool to be imported on a peer.
*
* We don't actually know what the import_interval is on the
* peer node, but the best assumption is that it is configured
* in the same way as the local node.
*/
if (mmp_fail_intervals >= zfs_multihost_import_intervals) {
static boolean_t warned = B_FALSE;

if (!warned) {
cmn_err(CE_WARN,
"zfs_multihost_fail_intervals %u >= "
"zfs_multihost_import_interval %u, "
"using smaller value",
zfs_multihost_fail_intervals,
zfs_multihost_import_interval);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

zfs_multihost_import_interval -> zfs_multihost_import_intervals ?

warned = B_TRUE;
}

mmp_fail_intervals = zfs_multihost_import_intervals - 1;
}

/*
Expand All @@ -417,16 +439,16 @@ mmp_thread(void *arg)
* immediately suspended before writes can occur at the new
* higher frequency.
*/
if ((mmp_interval * mmp_fail_intervals) < max_fail_ns) {
max_fail_ns = ((31 * max_fail_ns) + (mmp_interval *
mmp_fail_intervals)) / 32;
} else {
max_fail_ns = mmp_interval * mmp_fail_intervals;
}
fail_interval = MAX(mmp_interval,
mmp->mmp_delay * vdev_leaves) * mmp_fail_intervals;
if (fail_interval < max_fail_ns)
max_fail_ns = ((31 * max_fail_ns) + fail_interval) / 32;
else
max_fail_ns = fail_interval;

/*
* Suspend the pool if no MMP write has succeeded in over
* mmp_interval * mmp_fail_intervals nanoseconds.
* mmp_delay * vdev_leaves * mmp_fail_intervals nanoseconds.
*/
if (!suspended && mmp_fail_intervals && multihost &&
(start - mmp->mmp_last_write) > max_fail_ns) {
Expand Down