Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix kernel panic induced by redacted send #11297

Merged
merged 6 commits into from
Dec 11, 2020
Merged
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 54 additions & 85 deletions module/zfs/dsl_bookmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -1561,33 +1561,6 @@ dsl_bookmark_latest_txg(dsl_dataset_t *ds)
return (dbn->dbn_phys.zbm_creation_txg);
}

static inline unsigned int
redact_block_buf_num_entries(unsigned int size)
{
return (size / sizeof (redact_block_phys_t));
}

/*
* This function calculates the offset of the last entry in the array of
* redact_block_phys_t. If we're reading the redaction list into buffers of
* size bufsize, then for all but the last buffer, the last valid entry in the
* array will be the last entry in the array. However, for the last buffer, any
* amount of it may be filled. Thus, we check to see if we're looking at the
* last buffer in the redaction list, and if so, we return the total number of
* entries modulo the number of entries per buffer. Otherwise, we return the
* number of entries per buffer minus one.
*/
static inline unsigned int
last_entry(redaction_list_t *rl, unsigned int bufsize, uint64_t bufid)
{
if (bufid == (rl->rl_phys->rlp_num_entries - 1) /
redact_block_buf_num_entries(bufsize)) {
return ((rl->rl_phys->rlp_num_entries - 1) %
redact_block_buf_num_entries(bufsize));
}
return (redact_block_buf_num_entries(bufsize) - 1);
}

/*
* Compare the redact_block_phys_t to the bookmark. If the last block in the
* redact_block_phys_t is before the bookmark, return -1. If the first block in
Expand Down Expand Up @@ -1633,8 +1606,6 @@ dsl_redaction_list_traverse(redaction_list_t *rl, zbookmark_phys_t *resume,
rl_traverse_callback_t cb, void *arg)
{
objset_t *mos = rl->rl_mos;
redact_block_phys_t *buf;
unsigned int bufsize = SPA_OLD_MAXBLOCKSIZE;
int err = 0;

if (rl->rl_phys->rlp_last_object != UINT64_MAX ||
Expand All @@ -1651,42 +1622,47 @@ dsl_redaction_list_traverse(redaction_list_t *rl, zbookmark_phys_t *resume,
}
pcd1193182 marked this conversation as resolved.
Show resolved Hide resolved

/*
* Binary search for the point to resume from. The goal is to minimize
* the number of disk reads we have to perform.
* This allows us to skip the binary search and resume checking logic
* below, if we're not resuming a redacted send.
*/
buf = zio_data_buf_alloc(bufsize);
uint64_t maxbufid = (rl->rl_phys->rlp_num_entries - 1) /
redact_block_buf_num_entries(bufsize);
uint64_t minbufid = 0;
while (resume != NULL && maxbufid - minbufid >= 1) {
ASSERT3U(maxbufid, >, minbufid);
uint64_t midbufid = minbufid + ((maxbufid - minbufid) / 2);
err = dmu_read(mos, rl->rl_object, midbufid * bufsize, bufsize,
buf, DMU_READ_NO_PREFETCH);
if (ZB_IS_ZERO(resume))
resume = NULL;

/*
* Binary search for the point to resume from.
*/
uint64_t maxidx = rl->rl_phys->rlp_num_entries - 1;
uint64_t minidx = 0;
while (resume != NULL && maxidx > minidx) {
redact_block_phys_t rbp = { 0 };
uint64_t mididx = minidx + ((maxidx - minidx) / 2);
err = dmu_read(mos, rl->rl_object, mididx * sizeof (rbp),
sizeof (rbp), &rbp, DMU_READ_NO_PREFETCH);
if (err != 0)
break;

int cmp0 = redact_block_zb_compare(&buf[0], resume);
int cmpn = redact_block_zb_compare(
&buf[last_entry(rl, bufsize, maxbufid)], resume);
pcd1193182 marked this conversation as resolved.
Show resolved Hide resolved
int cmp = redact_block_zb_compare(&rbp, resume);

/*
* If the first block is before or equal to the resume point,
* and the last one is equal or after, then the resume point is
* in this buf, and we should start here.
*/
if (cmp0 <= 0 && cmpn >= 0)
if (cmp == 0) {
minidx = mididx;
break;

if (cmp0 > 0)
maxbufid = midbufid - 1;
else if (cmpn < 0)
minbufid = midbufid + 1;
else
panic("No progress in binary search for resume point");
} else if (cmp > 0) {
maxidx =
(mididx == minidx ? minidx : mididx - 1);
} else {
minidx = mididx + 1;
}
}

for (uint64_t curidx = minbufid * redact_block_buf_num_entries(bufsize);
unsigned int bufsize = SPA_OLD_MAXBLOCKSIZE;
redact_block_phys_t *buf = zio_data_buf_alloc(bufsize);

unsigned int entries_per_buf = bufsize / sizeof (redact_block_phys_t);
uint64_t start_block = minidx / entries_per_buf;
err = dmu_read(mos, rl->rl_object, start_block * bufsize, bufsize, buf,
DMU_READ_PREFETCH);

for (uint64_t curidx = minidx;
err == 0 && curidx < rl->rl_phys->rlp_num_entries;
curidx++) {
/*
Expand All @@ -1696,45 +1672,38 @@ dsl_redaction_list_traverse(redaction_list_t *rl, zbookmark_phys_t *resume,
* prefetching, and this code shouldn't be the bottleneck, so we
* don't need to do manual prefetching.
*/
if (curidx % redact_block_buf_num_entries(bufsize) == 0) {
if (curidx % entries_per_buf == 0) {
err = dmu_read(mos, rl->rl_object, curidx *
sizeof (*buf), bufsize, buf,
DMU_READ_PREFETCH);
if (err != 0)
break;
}
redact_block_phys_t *rb = &buf[curidx %
redact_block_buf_num_entries(bufsize)];
/*
* If resume is non-null, we should either not send the data, or
* null out resume so we don't have to keep doing these
* comparisons.
*/
redact_block_phys_t *rb = &buf[curidx % entries_per_buf];
if (resume != NULL) {
if (redact_block_zb_compare(rb, resume) < 0) {
continue;
} else {
/*
* If the place to resume is in the middle of
* the range described by this
* redact_block_phys, then modify the
* redact_block_phys in memory so we generate
* the right records.
*/
if (resume->zb_object == rb->rbp_object &&
resume->zb_blkid > rb->rbp_blkid) {
uint64_t diff = resume->zb_blkid -
rb->rbp_blkid;
rb->rbp_blkid = resume->zb_blkid;
redact_block_set_count(rb,
redact_block_get_count(rb) - diff);
}
resume = NULL;
ASSERT3S(redact_block_zb_compare(rb, resume), >=, 0);
/*
* If the place to resume is in the middle of
* the range described by this
* redact_block_phys, then modify the
* redact_block_phys in memory so we generate
* the right records.
*/
if (resume->zb_object == rb->rbp_object &&
resume->zb_blkid > rb->rbp_blkid) {
uint64_t diff = resume->zb_blkid -
rb->rbp_blkid;
rb->rbp_blkid = resume->zb_blkid;
redact_block_set_count(rb,
redact_block_get_count(rb) - diff);
}
resume = NULL;
}

if (cb(rb, arg) != 0)
if (cb(rb, arg) != 0) {
err = EINTR;
break;
}
}

zio_data_buf_free(buf, bufsize);
Expand Down