Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Module Parameter Regarding Log Size Limit #12284

Merged
merged 2 commits into from
Jul 20, 2021
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add Module Parameters Regarding Log Size Limit
zfs_wrlog_data_max
The upper limit of TX_WRITE log data. Once it is reached,
write operation is blocked, until log data is cleared out
after txg sync. It only counts TX_WRITE log with WR_COPIED
or WR_NEED_COPY.

Add write-transaction log data counter at end of the body of
zfs_log_write() and zvol_log_write().

Add delay logic into dmu_tx_try_assign().

Signed-off-by: jxdking <lostking2008@hotmail.com>
jxdking committed Jul 4, 2021
commit e7e717b9b726c4540b795f6b5b02695e2d294e3b
7 changes: 7 additions & 0 deletions include/sys/dsl_pool.h
Original file line number Diff line number Diff line change
@@ -40,6 +40,7 @@
#include <sys/rrwlock.h>
#include <sys/dsl_synctask.h>
#include <sys/mmp.h>
#include <sys/aggsum.h>

#ifdef __cplusplus
extern "C" {
@@ -58,6 +59,7 @@ struct dsl_deadlist;

extern unsigned long zfs_dirty_data_max;
extern unsigned long zfs_dirty_data_max_max;
extern unsigned long zfs_wrlog_data_max;
extern int zfs_dirty_data_sync_percent;
extern int zfs_dirty_data_max_percent;
extern int zfs_dirty_data_max_max_percent;
@@ -119,6 +121,9 @@ typedef struct dsl_pool {
uint64_t dp_mos_compressed_delta;
uint64_t dp_mos_uncompressed_delta;

aggsum_t dp_wrlog_pertxg[TXG_SIZE];
aggsum_t dp_wrlog_total;

/*
* Time of most recently scheduled (furthest in the future)
* wakeup for delayed transactions.
@@ -158,6 +163,8 @@ int dsl_pool_sync_context(dsl_pool_t *dp);
uint64_t dsl_pool_adjustedsize(dsl_pool_t *dp, zfs_space_check_t slop_policy);
uint64_t dsl_pool_unreserved_space(dsl_pool_t *dp,
zfs_space_check_t slop_policy);
void dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg);
boolean_t dsl_pool_wrlog_over_max(dsl_pool_t *dp);
void dsl_pool_dirty_space(dsl_pool_t *dp, int64_t space, dmu_tx_t *tx);
void dsl_pool_undirty_space(dsl_pool_t *dp, int64_t space, uint64_t txg);
void dsl_free(dsl_pool_t *dp, uint64_t txg, const blkptr_t *bpp);
12 changes: 12 additions & 0 deletions module/zfs/arc.c
Original file line number Diff line number Diff line change
@@ -7969,6 +7969,18 @@ arc_init(void)
zfs_dirty_data_max = MIN(zfs_dirty_data_max,
zfs_dirty_data_max_max);
}

if (zfs_wrlog_data_max == 0) {

/*
* dp_wrlog_total is reduced for each txg at the end of
* spa_sync(). However, dp_dirty_total is reduced every time
* a block being written out. Thus under normal operation,
behlendorf marked this conversation as resolved.
Show resolved Hide resolved
* dp_wrlog_total could grow 2 times as big as
* zfs_dirty_data_max.
*/
zfs_wrlog_data_max = zfs_dirty_data_max * 2;
}
}

void
5 changes: 5 additions & 0 deletions module/zfs/dmu_tx.c
Original file line number Diff line number Diff line change
@@ -884,6 +884,11 @@ dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
return (SET_ERROR(ERESTART));
}

if (!tx->tx_dirty_delayed &&
dsl_pool_wrlog_over_max(tx->tx_pool)) {
behlendorf marked this conversation as resolved.
Show resolved Hide resolved
return (SET_ERROR(ERESTART));
}

if (!tx->tx_dirty_delayed &&
dsl_pool_need_dirty_delay(tx->tx_pool)) {
tx->tx_wait_dirty = B_TRUE;
55 changes: 55 additions & 0 deletions module/zfs/dsl_pool.c
Original file line number Diff line number Diff line change
@@ -104,6 +104,14 @@ unsigned long zfs_dirty_data_max_max = 0;
int zfs_dirty_data_max_percent = 10;
int zfs_dirty_data_max_max_percent = 25;

/*
* zfs_wrlog_data_max, the upper limit of TX_WRITE log data.
* Once it is reached, write operation is blocked,
* until log data is cleared out after txg sync.
* It only counts TX_WRITE log with WR_COPIED or WR_NEED_COPY.
*/
unsigned long zfs_wrlog_data_max = 0;

/*
* If there's at least this much dirty data (as a percentage of
* zfs_dirty_data_max), push out a txg. This should be less than
@@ -220,6 +228,11 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);

aggsum_init(&dp->dp_wrlog_total, 0);
for (int i = 0; i < TXG_SIZE; i++) {
aggsum_init(&dp->dp_wrlog_pertxg[i], 0);
}

dp->dp_zrele_taskq = taskq_create("z_zrele", 100, defclsyspri,
boot_ncpus * 8, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC |
TASKQ_THREADS_CPU_PCT);
@@ -416,6 +429,12 @@ dsl_pool_close(dsl_pool_t *dp)
rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
cv_destroy(&dp->dp_spaceavail_cv);

aggsum_fini(&dp->dp_wrlog_total);
for (int i = 0; i < TXG_SIZE; i++) {
aggsum_fini(&dp->dp_wrlog_pertxg[i]);
}
behlendorf marked this conversation as resolved.
Show resolved Hide resolved

taskq_destroy(dp->dp_unlinked_drain_taskq);
taskq_destroy(dp->dp_zrele_taskq);
if (dp->dp_blkstats != NULL) {
@@ -592,6 +611,36 @@ dsl_pool_dirty_delta(dsl_pool_t *dp, int64_t delta)
cv_signal(&dp->dp_spaceavail_cv);
}

void
dsl_pool_wrlog_count(dsl_pool_t *dp, int64_t size, uint64_t txg)
{
ASSERT3S(size, >=, 0);

aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], size);
aggsum_add(&dp->dp_wrlog_total, size);

/* Choose a value slightly bigger than min dirty sync bytes */
uint64_t sync_min =
zfs_dirty_data_max * (zfs_dirty_data_sync_percent + 10) / 100;
if (aggsum_compare(&dp->dp_wrlog_pertxg[txg & TXG_MASK], sync_min) > 0)
txg_kick(dp, txg);
}

boolean_t
dsl_pool_wrlog_over_max(dsl_pool_t *dp)
{
return (aggsum_compare(&dp->dp_wrlog_total, zfs_wrlog_data_max) > 0);
}

static void
dsl_pool_wrlog_clear(dsl_pool_t *dp, uint64_t txg)
{
int64_t delta;
delta = -(int64_t)aggsum_value(&dp->dp_wrlog_pertxg[txg & TXG_MASK]);
aggsum_add(&dp->dp_wrlog_pertxg[txg & TXG_MASK], delta);
aggsum_add(&dp->dp_wrlog_total, delta);
}

#ifdef ZFS_DEBUG
static boolean_t
dsl_early_sync_task_verify(dsl_pool_t *dp, uint64_t txg)
@@ -816,6 +865,9 @@ dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
dmu_buf_rele(ds->ds_dbuf, zilog);
}

dsl_pool_wrlog_clear(dp, txg);

ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
}

@@ -1405,6 +1457,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, delay_min_dirty_percent, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max, ULONG, ZMOD_RW,
"Determines the dirty space limit");

ZFS_MODULE_PARAM(zfs, zfs_, wrlog_data_max, ULONG, ZMOD_RW,
"The size limit of write-transaction zil log data");
behlendorf marked this conversation as resolved.
Show resolved Hide resolved

/* zfs_dirty_data_max_max only applied at module load in arc_init(). */
ZFS_MODULE_PARAM(zfs, zfs_, dirty_data_max_max, ULONG, ZMOD_RD,
"zfs_dirty_data_max upper bound in bytes");
5 changes: 5 additions & 0 deletions module/zfs/zfs_log.c
Original file line number Diff line number Diff line change
@@ -541,6 +541,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
itx_wr_state_t write_state;
uintptr_t fsync_cnt;
uint64_t gen = 0;
ssize_t size = resid;

if (zil_replaying(zilog, tx) || zp->z_unlinked ||
zfs_xattr_owner_unlinked(zp)) {
@@ -626,6 +627,10 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
off += len;
resid -= len;
}

if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
dsl_pool_wrlog_count(zilog->zl_dmu_pool, size, tx->tx_txg);
}
}

/*
7 changes: 5 additions & 2 deletions module/zfs/zvol.c
Original file line number Diff line number Diff line change
@@ -84,10 +84,8 @@
#include <sys/zfs_rlock.h>
#include <sys/spa_impl.h>
#include <sys/zvol.h>

#include <sys/zvol_impl.h>


unsigned int zvol_inhibit_dev = 0;
unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;

@@ -579,6 +577,7 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
itx_wr_state_t write_state;
uint64_t sz = size;

if (zil_replaying(zilog, tx))
return;
@@ -630,6 +629,10 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
offset += len;
size -= len;
}

if (write_state == WR_COPIED || write_state == WR_NEED_COPY) {
dsl_pool_wrlog_count(zilog->zl_dmu_pool, sz, tx->tx_txg);
}
}

/*