Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ZIL: Improve next log block size prediction. #14909

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions include/os/linux/zfs/sys/trace_zil.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@
__field(uint64_t, zl_parse_lr_seq) \
__field(uint64_t, zl_parse_blk_count) \
__field(uint64_t, zl_parse_lr_count) \
__field(uint64_t, zl_cur_used) \
__field(uint64_t, zl_cur_size) \
__field(uint64_t, zl_cur_left) \
__field(uint64_t, zl_cur_max) \
__field(clock_t, zl_replay_time) \
__field(uint64_t, zl_replay_blks)

Expand All @@ -72,7 +74,9 @@
__entry->zl_parse_lr_seq = zilog->zl_parse_lr_seq; \
__entry->zl_parse_blk_count = zilog->zl_parse_blk_count;\
__entry->zl_parse_lr_count = zilog->zl_parse_lr_count; \
__entry->zl_cur_used = zilog->zl_cur_used; \
__entry->zl_cur_size = zilog->zl_cur_size; \
__entry->zl_cur_left = zilog->zl_cur_left; \
__entry->zl_cur_max = zilog->zl_cur_max; \
__entry->zl_replay_time = zilog->zl_replay_time; \
__entry->zl_replay_blks = zilog->zl_replay_blks;

Expand All @@ -82,7 +86,8 @@
"replay %u stop_sync %u logbias %u sync %u " \
"parse_error %u parse_blk_seq %llu parse_lr_seq %llu " \
"parse_blk_count %llu parse_lr_count %llu " \
"cur_used %llu replay_time %lu replay_blks %llu }"
"cur_size %u cur_left %llu cur_max %llu replay_time %lu " \
"replay_blks %llu }"

#define ZILOG_TP_PRINTK_ARGS \
__entry->zl_lr_seq, __entry->zl_commit_lr_seq, \
Expand All @@ -92,7 +97,8 @@
__entry->zl_stop_sync, __entry->zl_logbias, __entry->zl_sync, \
__entry->zl_parse_error, __entry->zl_parse_blk_seq, \
__entry->zl_parse_lr_seq, __entry->zl_parse_blk_count, \
__entry->zl_parse_lr_count, __entry->zl_cur_used, \
__entry->zl_parse_lr_count, __entry->zl_cur_size, \
__entry->zl_cur_left, __entry->zl_cur_max, \
__entry->zl_replay_time, __entry->zl_replay_blks

#define ITX_TP_STRUCT_ENTRY \
Expand Down
12 changes: 8 additions & 4 deletions include/sys/zil_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ typedef struct zil_vdev_node {
avl_node_t zv_node; /* AVL tree linkage */
} zil_vdev_node_t;

#define ZIL_PREV_BLKS 16
#define ZIL_BURSTS 8

/*
* Stable storage intent log management structure. One per dataset.
Expand Down Expand Up @@ -202,14 +202,18 @@ struct zilog {
uint64_t zl_parse_lr_count; /* number of log records parsed */
itxg_t zl_itxg[TXG_SIZE]; /* intent log txg chains */
list_t zl_itx_commit_list; /* itx list to be committed */
uint64_t zl_cur_used; /* current commit log size used */
uint64_t zl_cur_size; /* current burst full size */
uint64_t zl_cur_left; /* current burst remaining size */
uint64_t zl_cur_max; /* biggest record in current burst */
list_t zl_lwb_list; /* in-flight log write list */
avl_tree_t zl_bp_tree; /* track bps during log parse */
clock_t zl_replay_time; /* lbolt of when replay started */
uint64_t zl_replay_blks; /* number of log blocks replayed */
zil_header_t zl_old_header; /* debugging aid */
uint_t zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */
uint_t zl_prev_rotor; /* rotor for zl_prev[] */
uint_t zl_parallel; /* workload is multi-threaded */
uint_t zl_prev_rotor; /* rotor for zl_prev_* */
uint_t zl_prev_opt[ZIL_BURSTS]; /* optimal block size */
uint_t zl_prev_min[ZIL_BURSTS]; /* minimal first block size */
txg_node_t zl_dirty_link; /* protected by dp_dirty_zilogs list */
uint64_t zl_dirty_max_txg; /* highest txg used to dirty zilog */

Expand Down
2 changes: 1 addition & 1 deletion include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, zio_flag_t flags);

extern int zio_alloc_zil(spa_t *spa, objset_t *os, uint64_t txg,
blkptr_t *new_bp, uint64_t size, boolean_t *slog);
blkptr_t *new_bp, uint64_t size, uint64_t maxsize, boolean_t *slog);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern void zio_shrink(zio_t *zio, uint64_t size);

Expand Down
16 changes: 7 additions & 9 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
.\" own identifying information:
.\" Portions Copyright [yyyy] [name of copyright owner]
.\"
.Dd January 10, 2023
.Dd May 26, 2023
.Dt ZFS 4
.Os
.
Expand Down Expand Up @@ -790,7 +790,7 @@ Note that this should not be set below the ZED thresholds
(currently 10 checksums over 10 seconds)
or else the daemon may not trigger any action.
.
.It Sy zfs_commit_timeout_pct Ns = Ns Sy 5 Ns % Pq uint
.It Sy zfs_commit_timeout_pct Ns = Ns Sy 10 Ns % Pq uint
This controls the amount of time that a ZIL block (lwb) will remain "open"
when it isn't "full", and it has a thread waiting for it to be committed to
stable storage.
Expand Down Expand Up @@ -2142,12 +2142,10 @@ On very fragmented pools, lowering this
.Pq typically to Sy 36 KiB
can improve performance.
.
.It Sy zil_min_commit_timeout Ns = Ns Sy 5000 Pq u64
This sets the minimum delay in nanoseconds ZIL care to delay block commit,
waiting for more records.
If ZIL writes are too fast, kernel may not be able sleep for so short interval,
increasing log latency above allowed by
.Sy zfs_commit_timeout_pct .
.It Sy zil_maxcopied Ns = Ns Sy 8192 Ns B Po 8 KiB Pc Pq uint
This sets the maximum number of write bytes logged via WR_COPIED.
It tunes a tradeoff between additional memory copy and possibly worse log
space efficiency vs additional range lock/unlock.
.
.It Sy zil_nocacheflush Ns = Ns Sy 0 Ns | Ns 1 Pq int
Disable the cache flush commands that are normally sent to disk by
Expand All @@ -2159,7 +2157,7 @@ if a volatile out-of-order write cache is enabled.
Disable intent logging replay.
Can be disabled for recovery from corrupted ZIL.
.
.It Sy zil_slog_bulk Ns = Ns Sy 786432 Ns B Po 768 KiB Pc Pq u64
.It Sy zil_slog_bulk Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq u64
Limit SLOG write size per commit executed with synchronous priority.
Any writes above that will be executed with lower (asynchronous) priority
to limit potential SLOG device abuse by single active ZIL writer.
Expand Down
Loading