Skip to content

Commit

Permalink
ZTS: Fix zpool_reopen_001_pos (DEBUG)
Browse files Browse the repository at this point in the history
Update the vdev_disk_open() retry logic to use a specified number
of milliseconds to be more robust.  Additionally, log both the
time waited and requested timeout to the internal log for debugging.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
  • Loading branch information
behlendorf committed Dec 4, 2019
1 parent 12395c7 commit f76f598
Showing 1 changed file with 24 additions and 9 deletions.
33 changes: 24 additions & 9 deletions module/os/linux/zfs/vdev_disk.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@

static void *zfs_vdev_holder = VDEV_HOLDER;

/*
* Wait up to zfs_vdev_open_timeout_ms milliseconds before determining the
* device is missing. The missing path may be transient since the links
* can be briefly removed and recreated in response to udev events.
*/
static unsigned zfs_vdev_open_timeout_ms = 500;

/* size of the "reserved" partition, in blocks */
#define EFI_MIN_RESV_SIZE (16 * 1024)

Expand Down Expand Up @@ -146,8 +153,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
{
struct block_device *bdev;
fmode_t mode = vdev_bdev_mode(spa_mode(v->vdev_spa));
int count = 0, block_size;
int bdev_retry_count = 50;
hrtime_t timeout = MSEC2NSEC(zfs_vdev_open_timeout_ms);
vdev_disk_t *vd;

/* Must have a pathname and it must be absolute. */
Expand All @@ -162,7 +168,7 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
* partition force re-scanning the partition table while closed
* in order to get an accurate updated block device size. Then
* since udev may need to recreate the device links increase the
* open retry count before reporting the device as unavailable.
* open retry timeout before reporting the device as unavailable.
*/
vd = v->vdev_tsd;
if (vd) {
Expand All @@ -188,8 +194,10 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
if (!IS_ERR(bdev)) {
int error = vdev_bdev_reread_part(bdev);
blkdev_put(bdev, mode | FMODE_EXCL);
if (error == 0)
bdev_retry_count = 100;
if (error == 0) {
timeout = MSEC2NSEC(
zfs_vdev_open_timeout_ms * 2);
}
}
}
} else {
Expand Down Expand Up @@ -222,21 +230,23 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
* and it is reasonable to sleep and retry before giving up. In
* practice delays have been observed to be on the order of 100ms.
*/
hrtime_t start = gethrtime();
bdev = ERR_PTR(-ENXIO);
while (IS_ERR(bdev) && count < bdev_retry_count) {
while (IS_ERR(bdev) && ((gethrtime() - start) < timeout)) {
bdev = blkdev_get_by_path(v->vdev_path, mode | FMODE_EXCL,
zfs_vdev_holder);
if (unlikely(PTR_ERR(bdev) == -ENOENT)) {
schedule_timeout(MSEC_TO_TICK(10));
count++;
} else if (IS_ERR(bdev)) {
break;
}
}

if (IS_ERR(bdev)) {
int error = -PTR_ERR(bdev);
vdev_dbgmsg(v, "open error=%d count=%d", error, count);
vdev_dbgmsg(v, "open error=%d timeout=%llu/%llu", error,
(u_longlong_t)(gethrtime() - start),
(u_longlong_t)timeout);
vd->vd_bdev = NULL;
v->vdev_tsd = vd;
rw_exit(&vd->vd_lock);
Expand All @@ -247,10 +257,15 @@ vdev_disk_open(vdev_t *v, uint64_t *psize, uint64_t *max_psize,
rw_exit(&vd->vd_lock);
}

/* DEBUG: To be removed in the final version. */
vdev_dbgmsg(v, "open timeout=%llu/%llu",
(u_longlong_t)(gethrtime() - start),
(u_longlong_t)timeout);

struct request_queue *q = bdev_get_queue(vd->vd_bdev);

/* Determine the physical block size */
block_size = bdev_physical_block_size(vd->vd_bdev);
int block_size = bdev_physical_block_size(vd->vd_bdev);

/* Clear the nowritecache bit, causes vdev_reopen() to try again. */
v->vdev_nowritecache = B_FALSE;
Expand Down

0 comments on commit f76f598

Please sign in to comment.