diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
index 1676020d04d3..89ee30014c6c 100644
--- a/include/sys/fs/zfs.h
+++ b/include/sys/fs/zfs.h
@@ -378,6 +378,7 @@ typedef enum {
VDEV_PROP_TRIM_SUPPORT,
VDEV_PROP_TRIM_ERRORS,
VDEV_PROP_SLOW_IOS,
+ VDEV_PROP_QUEUE_IO,
VDEV_NUM_PROPS
} vdev_prop_t;
diff --git a/include/sys/vdev_impl.h b/include/sys/vdev_impl.h
index abd66b8abc96..dbb98680e1bb 100644
--- a/include/sys/vdev_impl.h
+++ b/include/sys/vdev_impl.h
@@ -467,6 +467,7 @@ struct vdev {
uint64_t vdev_io_t;
uint64_t vdev_slow_io_n;
uint64_t vdev_slow_io_t;
+ uint64_t vdev_queue_io;
};
#define VDEV_PAD_SIZE (8 << 10)
diff --git a/include/sys/zio.h b/include/sys/zio.h
index 46f5d68aed4a..6fe01b5a555e 100644
--- a/include/sys/zio.h
+++ b/include/sys/zio.h
@@ -209,24 +209,25 @@ typedef uint64_t zio_flag_t;
#define ZIO_FLAG_TRYHARD (1ULL << 17)
#define ZIO_FLAG_OPTIONAL (1ULL << 18)
#define ZIO_FLAG_DIO_READ (1ULL << 19)
+#define ZIO_FLAG_DIO_WRITE (1ULL << 20)
#define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1)
/*
* Flags not inherited by any children.
*/
-#define ZIO_FLAG_DONT_QUEUE (1ULL << 20) /* must be first for INHERIT */
-#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 21)
-#define ZIO_FLAG_IO_BYPASS (1ULL << 22)
-#define ZIO_FLAG_IO_REWRITE (1ULL << 23)
-#define ZIO_FLAG_RAW_COMPRESS (1ULL << 24)
-#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 25)
-#define ZIO_FLAG_GANG_CHILD (1ULL << 26)
-#define ZIO_FLAG_DDT_CHILD (1ULL << 27)
-#define ZIO_FLAG_GODFATHER (1ULL << 28)
-#define ZIO_FLAG_NOPWRITE (1ULL << 29)
-#define ZIO_FLAG_REEXECUTED (1ULL << 30)
-#define ZIO_FLAG_DELEGATED (1ULL << 31)
-#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 32)
+#define ZIO_FLAG_DONT_QUEUE (1ULL << 21) /* must be first for INHERIT */
+#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 22)
+#define ZIO_FLAG_IO_BYPASS (1ULL << 23)
+#define ZIO_FLAG_IO_REWRITE (1ULL << 24)
+#define ZIO_FLAG_RAW_COMPRESS (1ULL << 25)
+#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 26)
+#define ZIO_FLAG_GANG_CHILD (1ULL << 27)
+#define ZIO_FLAG_DDT_CHILD (1ULL << 28)
+#define ZIO_FLAG_GODFATHER (1ULL << 29)
+#define ZIO_FLAG_NOPWRITE (1ULL << 30)
+#define ZIO_FLAG_REEXECUTED (1ULL << 31)
+#define ZIO_FLAG_DELEGATED (1ULL << 32)
+#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 33)
#define ZIO_ALLOCATOR_NONE (-1)
#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)
diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi
index ac9ae233c72d..5317417f7f57 100644
--- a/lib/libzfs/libzfs.abi
+++ b/lib/libzfs/libzfs.abi
@@ -5916,7 +5916,8 @@
-
+
+
diff --git a/man/man7/vdevprops.7 b/man/man7/vdevprops.7
index 34d4026b1009..7595c5f2aa19 100644
--- a/man/man7/vdevprops.7
+++ b/man/man7/vdevprops.7
@@ -156,6 +156,9 @@ If this device should perform new allocations, used to disable a device
when it is scheduled for later removal.
See
.Xr zpool-remove 8 .
+.It Sy queue_io
+Add io to the vdev queue when reading or writing to this vdev.
+Disabling this property can sometimes improve performance for direct IOs.
.El
.Ss User Properties
In addition to the standard native properties, ZFS supports arbitrary user
diff --git a/module/zcommon/zfs_valstr.c b/module/zcommon/zfs_valstr.c
index 43bccea14a85..ba5ed5336e42 100644
--- a/module/zcommon/zfs_valstr.c
+++ b/module/zcommon/zfs_valstr.c
@@ -207,6 +207,7 @@ _VALSTR_BITFIELD_IMPL(zio_flag,
{ '.', "TH", "TRYHARD" },
{ '.', "OP", "OPTIONAL" },
{ '.', "RD", "DIO_READ" },
+ { '.', "WD", "DIO_WRITE" },
{ '.', "DQ", "DONT_QUEUE" },
{ '.', "DP", "DONT_PROPAGATE" },
{ '.', "BY", "IO_BYPASS" },
diff --git a/module/zcommon/zpool_prop.c b/module/zcommon/zpool_prop.c
index d3355730ba3d..c8831753dff9 100644
--- a/module/zcommon/zpool_prop.c
+++ b/module/zcommon/zpool_prop.c
@@ -466,6 +466,9 @@ vdev_prop_init(void)
zprop_register_index(VDEV_PROP_TRIM_SUPPORT, "trim_support", 0,
PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "TRIMSUP",
boolean_table, sfeatures);
+ zprop_register_index(VDEV_PROP_QUEUE_IO, "queue_io", 1,
+ PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "QUEUE_IO",
+ boolean_table, sfeatures);
/* default index properties */
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
diff --git a/module/zfs/dmu_direct.c b/module/zfs/dmu_direct.c
index 40b78b519f49..a12b0422de43 100644
--- a/module/zfs/dmu_direct.c
+++ b/module/zfs/dmu_direct.c
@@ -195,7 +195,8 @@ dmu_write_direct(zio_t *pio, dmu_buf_impl_t *db, abd_t *data, dmu_tx_t *tx)
zio_t *zio = zio_write(pio, os->os_spa, txg, bp, data,
db->db.db_size, db->db.db_size, &zp,
dmu_write_direct_ready, NULL, dmu_write_direct_done, dsa,
- ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb);
+ ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL | ZIO_FLAG_DIO_WRITE,
+ &zb);
if (pio == NULL)
return (zio_wait(zio));
diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
index 983f444d79b0..f648babf7ae7 100644
--- a/module/zfs/vdev.c
+++ b/module/zfs/vdev.c
@@ -704,6 +704,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);
+ vd->vdev_queue_io = vdev_prop_default_numeric(VDEV_PROP_QUEUE_IO);
+
list_link_init(&vd->vdev_config_dirty_node);
list_link_init(&vd->vdev_state_dirty_node);
list_link_init(&vd->vdev_initialize_node);
@@ -6053,6 +6055,15 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
}
vd->vdev_slow_io_t = intval;
break;
+ case VDEV_PROP_QUEUE_IO:
+ if (nvpair_value_uint64(elem, &intval) != 0) {
+ error = EINVAL;
+ break;
+ }
+ if (vd->vdev_ops->vdev_op_leaf) {
+ vd->vdev_queue_io = intval;
+ }
+ break;
default:
/* Most processing is done in vdev_props_set_sync */
break;
@@ -6416,6 +6427,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
case VDEV_PROP_IO_T:
case VDEV_PROP_SLOW_IO_N:
case VDEV_PROP_SLOW_IO_T:
+ case VDEV_PROP_QUEUE_IO:
err = vdev_prop_get_int(vd, prop, &intval);
if (err && err != ENOENT)
break;
diff --git a/module/zfs/vdev_queue.c b/module/zfs/vdev_queue.c
index 092b3f375be0..d2d29106fa26 100644
--- a/module/zfs/vdev_queue.c
+++ b/module/zfs/vdev_queue.c
@@ -946,6 +946,12 @@ vdev_queue_io(zio_t *zio)
zio->io_flags |= ZIO_FLAG_DONT_QUEUE;
zio->io_timestamp = gethrtime();
+ if (!zio->io_vd->vdev_queue_io &&
+ zio->io_flags & (ZIO_FLAG_DIO_READ | ZIO_FLAG_DIO_WRITE)) {
+ zio->io_queue_state = ZIO_QS_NONE;
+ return (zio);
+ }
+
mutex_enter(&vq->vq_lock);
vdev_queue_io_add(vq, zio);
nio = vdev_queue_io_to_issue(vq);
@@ -978,6 +984,10 @@ vdev_queue_io_done(zio_t *zio)
vq->vq_io_complete_ts = now;
vq->vq_io_delta_ts = zio->io_delta = now - zio->io_timestamp;
+ if (zio->io_queue_state == ZIO_QS_NONE) {
+ return;
+ }
+
mutex_enter(&vq->vq_lock);
vdev_queue_pending_remove(vq, zio);
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index fc4adc42d00a..3460b30fd4e9 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -544,7 +544,7 @@ tags = ['functional', 'cli_root', 'zpool_scrub']
[tests/functional/cli_root/zpool_set]
tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg',
'zpool_set_ashift', 'zpool_set_features', 'vdev_set_001_pos',
- 'user_property_001_pos', 'user_property_002_neg']
+ 'user_property_001_pos', 'user_property_002_neg', 'vdev_set_queue_io']
tags = ['functional', 'cli_root', 'zpool_set']
[tests/functional/cli_root/zpool_split]
diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am
index 7d1551a63f0d..56cceb179179 100644
--- a/tests/zfs-tests/tests/Makefile.am
+++ b/tests/zfs-tests/tests/Makefile.am
@@ -1233,6 +1233,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_set/setup.ksh \
functional/cli_root/zpool/setup.ksh \
functional/cli_root/zpool_set/vdev_set_001_pos.ksh \
+ functional/cli_root/zpool_set/vdev_set_queue_io.ksh \
functional/cli_root/zpool_set/zpool_set_common.kshlib \
functional/cli_root/zpool_set/zpool_set_001_pos.ksh \
functional/cli_root/zpool_set/zpool_set_002_neg.ksh \
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg b/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg
index 6cfa7eaf7514..0650cebb6b4a 100644
--- a/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_get/vdev_get.cfg
@@ -75,4 +75,5 @@ typeset -a properties=(
trim_support
trim_errors
slow_ios
+ queue_io
)
diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_set/vdev_set_queue_io.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_set/vdev_set_queue_io.ksh
new file mode 100755
index 000000000000..6f7d967fce13
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/cli_root/zpool_set/vdev_set_queue_io.ksh
@@ -0,0 +1,95 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or https://opensource.org/licenses/CDDL-1.0.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2024 by Triad National Security, LLC.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+#
+# DESCRIPTION:
+# Toggling vdev queue_io property while reading from vdev should not cause panic.
+#
+# STRATEGY:
+# 1. Create a zpool
+# 2. Write a file to the pool.
+# 3. Start reading from file, while also toggling the queue_io property on / off.
+#
+
+verify_runnable "global"
+
+command -v fio > /dev/null || log_unsupported "fio missing"
+log_must save_tunable DIO_ENABLED
+log_must set_tunable32 DIO_ENABLED 1
+
+function toggle_queue_io
+{
+ zpool set queue_io=off $TESTPOOL1 $FILEDEV
+ sleep 0.1
+ zpool set queue_io=on $TESTPOOL1 $FILEDEV
+ sleep 0.1
+}
+
+function cleanup
+{
+ log_must destroy_pool $TESTPOOL1
+ rm -f $FILEDEV
+ log_must restore_tunable DIO_ENABLED
+}
+
+log_assert "Toggling vdev queue_io property while reading from vdev should not cause panic"
+log_onexit cleanup
+
+# 1. Create a pool
+
+FILEDEV="$TEST_BASE_DIR/filedev.$$"
+log_must truncate -s $(($MINVDEVSIZE * 2)) $FILEDEV
+log_must create_pool $TESTPOOL1 $FILEDEV
+
+mntpnt=$(get_prop mountpoint $TESTPOOL1)
+
+# 2. Write a file to the pool, while also toggling the queue_io property on / off.
+
+log_must eval "fio --filename=$mntpnt/foobar --name=write-file \
+ --rw=write --size=$MINVDEVSIZE --bs=128k --numjobs=1 --direct=1 \
+ --ioengine=sync --runtime=10 &"
+
+ITERATIONS=30
+
+for i in $(seq $ITERATIONS); do
+ log_must toggle_queue_io
+done;
+wait
+
+# 3. Starting reading from file, while also toggling the queue_io property on / off.
+
+log_must eval "fio --filename=$mntpnt/foobar --name=read-file \
+ --rw=read --size=$MINVDEVSIZE --bs=128k --numjobs=1 --direct=1 \
+ --ioengine=sync --time_based --runtime=10 &"
+
+for i in $(seq $ITERATIONS); do
+ log_must toggle_queue_io
+done;
+wait
+
+log_pass "Toggling vdev queue_io property while reading from vdev does not cause panic"