Skip to content

Commit

Permalink
Improve ZVOL queue behavior.
Browse files Browse the repository at this point in the history
The Linux block device queue subsystem exposes a number of configurable
settings described in Linux block/blk-settings.c. The defaults for these
settings are tuned for hard drives, and are not optimized for ZVOLs. Proper
configuration of these options would allow upper layers (I/O scheduler) to
take better decisions about write merging and ordering.

Detailed rationale:

 - max_hw_sectors is set to unlimited (UINT_MAX). zvol_write() is able to
   handle writes of any size, so there's no reason to impose a limit. Let the
   upper layer decide.

 - max_segments and max_segment_size are set to unlimited. zvol_write() will
   copy the requests' contents into a dbuf anyway, so the number and size of
   the segments are irrelevant. Let the upper layer decide.

 - physical_block_size and io_opt are set to the ZVOL's block size. This
   has the potential to somewhat alleviate issue #361 for ZVOLs, by warning
   the upper layers that writes smaller than the volume's block size will be
   slow.

 - The NONROT flag is set to indicate this isn't a rotational device.
   Although the backing zpool might be composed of rotational devices, the
   resulting ZVOL often doesn't exhibit the same behavior due to the COW
   mechanisms used by ZFS. Setting this flag will prevent upper layers from
   making useless decisions (such as reordering writes) based on incorrect
   assumptions about the behavior of the ZVOL.

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
  • Loading branch information
dechamps authored and behlendorf committed Feb 8, 2012
1 parent b18019d commit 34037af
Show file tree
Hide file tree
Showing 64 changed files with 3,023 additions and 1,864 deletions.
5 changes: 5 additions & 0 deletions Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
Expand Down
5 changes: 5 additions & 0 deletions cmd/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
Expand Down
5 changes: 5 additions & 0 deletions cmd/mount_zfs/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
Expand Down
5 changes: 5 additions & 0 deletions cmd/sas_switch_id/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
Expand Down
5 changes: 5 additions & 0 deletions cmd/zdb/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
Expand Down
5 changes: 5 additions & 0 deletions cmd/zfs/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
Expand Down
5 changes: 5 additions & 0 deletions cmd/zinject/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
Expand Down
5 changes: 5 additions & 0 deletions cmd/zpios/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
Expand Down
5 changes: 5 additions & 0 deletions cmd/zpool/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
Expand Down
5 changes: 5 additions & 0 deletions cmd/zpool_id/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
Expand Down
5 changes: 5 additions & 0 deletions cmd/zpool_layout/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
Expand Down
5 changes: 5 additions & 0 deletions cmd/ztest/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
Expand Down
5 changes: 5 additions & 0 deletions cmd/zvol_id/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ am__aclocal_m4_deps = \
$(top_srcdir)/config/kernel-blk-end-request.m4 \
$(top_srcdir)/config/kernel-blk-fetch-request.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-io-opt.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
$(top_srcdir)/config/kernel-blk-queue-nonrot.m4 \
$(top_srcdir)/config/kernel-blk-queue-physical-block-size.m4 \
$(top_srcdir)/config/kernel-blk-requeue-request.m4 \
$(top_srcdir)/config/kernel-blk-rq-bytes.m4 \
$(top_srcdir)/config/kernel-blk-rq-pos.m4 \
Expand Down
24 changes: 24 additions & 0 deletions config/kernel-blk-queue-io-opt.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
dnl #
dnl # 2.6.30 API change
dnl # The blk_queue_io_opt() function was added to indicate the optimal
dnl # I/O size for the device.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_IO_OPT], [
AC_MSG_CHECKING([whether blk_queue_io_opt() is available])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Wno-unused-but-set-variable"
ZFS_LINUX_TRY_COMPILE([
#include <linux/blkdev.h>
],[
struct request_queue *q = NULL;
unsigned int opt = 1;
(void) blk_queue_io_opt(q, opt);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_BLK_QUEUE_IO_OPT, 1,
[blk_queue_io_opt() is available])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])
22 changes: 22 additions & 0 deletions config/kernel-blk-queue-max-hw-sectors.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
dnl #
dnl # 2.6.34 API change
dnl # blk_queue_max_hw_sectors() replaces blk_queue_max_sectors().
dnl #
AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS], [
AC_MSG_CHECKING([whether blk_queue_max_hw_sectors() is available])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Wno-unused-but-set-variable"
ZFS_LINUX_TRY_COMPILE([
#include <linux/blkdev.h>
],[
struct request_queue *q = NULL;
(void) blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_BLK_QUEUE_MAX_HW_SECTORS, 1,
[blk_queue_max_hw_sectors() is available])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])
23 changes: 23 additions & 0 deletions config/kernel-blk-queue-max-segments.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
dnl #
dnl # 2.6.34 API change
dnl # blk_queue_max_segments() consolidates blk_queue_max_hw_segments()
dnl # and blk_queue_max_phys_segments().
dnl #
AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [
AC_MSG_CHECKING([whether blk_queue_max_segments() is available])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Wno-unused-but-set-variable"
ZFS_LINUX_TRY_COMPILE([
#include <linux/blkdev.h>
],[
struct request_queue *q = NULL;
(void) blk_queue_max_segments(q, BLK_MAX_SEGMENTS);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_BLK_QUEUE_MAX_SEGMENTS, 1,
[blk_queue_max_segments() is available])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])
25 changes: 25 additions & 0 deletions config/kernel-blk-queue-nonrot.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
dnl #
dnl # 2.6.27 API change
dnl # The blk_queue_nonrot() function and QUEUE_FLAG_NONROT flag were
dnl # added so non-rotational devices could be identified. These devices
dnl # have no seek time which the higher level elevator uses to optimize
dnl # how the I/O issued to the device.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_NONROT], [
AC_MSG_CHECKING([whether blk_queue_nonrot() is available])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Wno-unused-but-set-variable"
ZFS_LINUX_TRY_COMPILE([
#include <linux/blkdev.h>
],[
struct request_queue *q = NULL;
(void) blk_queue_nonrot(q);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_BLK_QUEUE_NONROT, 1,
[blk_queue_nonrot() is available])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])
25 changes: 25 additions & 0 deletions config/kernel-blk-queue-physical-block-size.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
dnl #
dnl # 2.6.30 API change
dnl # The blk_queue_physical_block_size() function was introduced to
dnl # indicate the smallest I/O the device can write without incurring
dnl # a read-modify-write penalty.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_PHYSICAL_BLOCK_SIZE], [
AC_MSG_CHECKING([whether blk_queue_physical_block_size() is available])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Wno-unused-but-set-variable"
ZFS_LINUX_TRY_COMPILE([
#include <linux/blkdev.h>
],[
struct request_queue *q = NULL;
unsigned short block_size = 1;
(void) blk_queue_physical_block_size(q, block_size);
],[
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_BLK_QUEUE_PHYSICAL_BLOCK_SIZE, 1,
[blk_queue_physical_block_size() is available])
],[
AC_MSG_RESULT(no)
])
EXTRA_KCFLAGS="$tmp_flags"
])
5 changes: 5 additions & 0 deletions config/kernel.m4
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_REQ_SYNC
ZFS_AC_KERNEL_BLK_END_REQUEST
ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
ZFS_AC_KERNEL_BLK_QUEUE_PHYSICAL_BLOCK_SIZE
ZFS_AC_KERNEL_BLK_QUEUE_IO_OPT
ZFS_AC_KERNEL_BLK_QUEUE_NONROT
ZFS_AC_KERNEL_BLK_FETCH_REQUEST
ZFS_AC_KERNEL_BLK_REQUEUE_REQUEST
ZFS_AC_KERNEL_BLK_RQ_BYTES
Expand Down
Loading

0 comments on commit 34037af

Please sign in to comment.