Skip to content

Commit

Permalink
Fix zvol+btrfs hang
Browse files Browse the repository at this point in the history
When using a zvol to back a btrfs filesystem the btrfs mount
would hang.  This was due to the bio completion callback used
in btrfs assuming that lower level drivers would never modify
the bio->bi_io_vecs after they were submitted via bio_submit().
If they are modified btrfs will miscalculate which pages need
to be unlocked resulting in a hang.

It's worth mentioning that other file systems such as ext[234]
and xfs work fine because they do not make the same assumption
in the bio completion callback.

The most straight forward way to fix the issue is to present
the semantics expected by btrfs.  This is done by cloning the
bios attached to each request and then using the clones bvecs
to perform the required accounting.  The clones are freed after
each read/write and the original unmodified bios are linked back
in to the request.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #469
  • Loading branch information
behlendorf committed Nov 8, 2012
1 parent bbf8c74 commit f6a8d27
Showing 1 changed file with 63 additions and 0 deletions.
63 changes: 63 additions & 0 deletions module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1022,11 +1022,57 @@ dmu_req_copy(void *arg_buf, int size, int *offset, struct request *req)
return 0;
}

static void
dmu_bio_put(struct bio *bio)
{
struct bio *bio_next;

while (bio) {
bio_next = bio->bi_next;
bio_put(bio);
bio = bio_next;
}
}

static int
dmu_bio_clone(struct bio *bio, struct bio **bio_copy)
{
struct bio *bio_root = NULL;
struct bio *bio_last = NULL;
struct bio *bio_new;

if (bio == NULL)
return EINVAL;

while (bio) {
bio_new = bio_clone(bio, GFP_NOIO);
if (bio_new == NULL) {
dmu_bio_put(bio_root);
return ENOMEM;
}

if (bio_last) {
bio_last->bi_next = bio_new;
bio_last = bio_new;
} else {
bio_root = bio_new;
bio_last = bio_new;
}

bio = bio->bi_next;
}

*bio_copy = bio_root;

return 0;
}

int
dmu_read_req(objset_t *os, uint64_t object, struct request *req)
{
uint64_t size = blk_rq_bytes(req);
uint64_t offset = blk_rq_pos(req) << 9;
struct bio *bio_saved = req->bio;
dmu_buf_t **dbp;
int numbufs, i, err;

Expand All @@ -1039,6 +1085,10 @@ dmu_read_req(objset_t *os, uint64_t object, struct request *req)
if (err)
return (err);

err = dmu_bio_clone(bio_saved, &req->bio);
if (err)
goto error;

for (i = 0; i < numbufs; i++) {
int tocpy, didcpy, bufoff;
dmu_buf_t *db = dbp[i];
Expand All @@ -1062,6 +1112,10 @@ dmu_read_req(objset_t *os, uint64_t object, struct request *req)
offset += didcpy;
err = 0;
}

dmu_bio_put(req->bio);
req->bio = bio_saved;
error:
dmu_buf_rele_array(dbp, numbufs, FTAG);

return (err);
Expand All @@ -1072,6 +1126,7 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
{
uint64_t size = blk_rq_bytes(req);
uint64_t offset = blk_rq_pos(req) << 9;
struct bio *bio_saved = req->bio;
dmu_buf_t **dbp;
int numbufs;
int err = 0;
Expand All @@ -1085,6 +1140,10 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
if (err)
return (err);

err = dmu_bio_clone(bio_saved, &req->bio);
if (err)
goto error;

for (i = 0; i < numbufs; i++) {
int tocpy, didcpy, bufoff;
dmu_buf_t *db = dbp[i];
Expand Down Expand Up @@ -1119,7 +1178,11 @@ dmu_write_req(objset_t *os, uint64_t object, struct request *req, dmu_tx_t *tx)
err = 0;
}

dmu_bio_put(req->bio);
req->bio = bio_saved;
error:
dmu_buf_rele_array(dbp, numbufs, FTAG);

return (err);
}

Expand Down

0 comments on commit f6a8d27

Please sign in to comment.