Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add vdev property to bypass vdev queue #16591

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@ typedef enum {
VDEV_PROP_TRIM_SUPPORT,
VDEV_PROP_TRIM_ERRORS,
VDEV_PROP_SLOW_IOS,
VDEV_PROP_QUEUE_IO,
VDEV_NUM_PROPS
} vdev_prop_t;

Expand Down
1 change: 1 addition & 0 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,7 @@ struct vdev {
uint64_t vdev_io_t;
uint64_t vdev_slow_io_n;
uint64_t vdev_slow_io_t;
uint64_t vdev_queue_io;
};

#define VDEV_PAD_SIZE (8 << 10)
Expand Down
27 changes: 14 additions & 13 deletions include/sys/zio.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,24 +209,25 @@ typedef uint64_t zio_flag_t;
#define ZIO_FLAG_TRYHARD (1ULL << 17)
#define ZIO_FLAG_OPTIONAL (1ULL << 18)
#define ZIO_FLAG_DIO_READ (1ULL << 19)
#define ZIO_FLAG_DIO_WRITE (1ULL << 20)
#define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1)

/*
* Flags not inherited by any children.
*/
#define ZIO_FLAG_DONT_QUEUE (1ULL << 20) /* must be first for INHERIT */
#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 21)
#define ZIO_FLAG_IO_BYPASS (1ULL << 22)
#define ZIO_FLAG_IO_REWRITE (1ULL << 23)
#define ZIO_FLAG_RAW_COMPRESS (1ULL << 24)
#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 25)
#define ZIO_FLAG_GANG_CHILD (1ULL << 26)
#define ZIO_FLAG_DDT_CHILD (1ULL << 27)
#define ZIO_FLAG_GODFATHER (1ULL << 28)
#define ZIO_FLAG_NOPWRITE (1ULL << 29)
#define ZIO_FLAG_REEXECUTED (1ULL << 30)
#define ZIO_FLAG_DELEGATED (1ULL << 31)
#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 32)
#define ZIO_FLAG_DONT_QUEUE (1ULL << 21) /* must be first for INHERIT */
#define ZIO_FLAG_DONT_PROPAGATE (1ULL << 22)
#define ZIO_FLAG_IO_BYPASS (1ULL << 23)
#define ZIO_FLAG_IO_REWRITE (1ULL << 24)
#define ZIO_FLAG_RAW_COMPRESS (1ULL << 25)
#define ZIO_FLAG_RAW_ENCRYPT (1ULL << 26)
#define ZIO_FLAG_GANG_CHILD (1ULL << 27)
#define ZIO_FLAG_DDT_CHILD (1ULL << 28)
#define ZIO_FLAG_GODFATHER (1ULL << 29)
#define ZIO_FLAG_NOPWRITE (1ULL << 30)
#define ZIO_FLAG_REEXECUTED (1ULL << 31)
#define ZIO_FLAG_DELEGATED (1ULL << 32)
#define ZIO_FLAG_DIO_CHKSUM_ERR (1ULL << 33)

#define ZIO_ALLOCATOR_NONE (-1)
#define ZIO_HAS_ALLOCATOR(zio) ((zio)->io_allocator != ZIO_ALLOCATOR_NONE)
Expand Down
3 changes: 2 additions & 1 deletion lib/libzfs/libzfs.abi
Original file line number Diff line number Diff line change
Expand Up @@ -5916,7 +5916,8 @@
<enumerator name='VDEV_PROP_TRIM_SUPPORT' value='49'/>
<enumerator name='VDEV_PROP_TRIM_ERRORS' value='50'/>
<enumerator name='VDEV_PROP_SLOW_IOS' value='51'/>
<enumerator name='VDEV_NUM_PROPS' value='52'/>
<enumerator name='VDEV_PROP_QUEUE_IO' value='52'/>
<enumerator name='VDEV_NUM_PROPS' value='53'/>
</enum-decl>
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
<class-decl name='zpool_load_policy' size-in-bits='256' is-struct='yes' visibility='default' id='2f65b36f'>
Expand Down
3 changes: 3 additions & 0 deletions man/man7/vdevprops.7
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ If this device should perform new allocations, used to disable a device
when it is scheduled for later removal.
See
.Xr zpool-remove 8 .
.It Sy queue_io
Add io to the vdev queue when reading or writing to this vdev.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might want to give a hint to the user about when they would want to change this prop:

Add IO to the vdev queue when reading or writing to this vdev.
Disabling this property can sometimes improve performance for direct IOs.

Feel free to re-word this ⬆️

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

Disabling this property can sometimes improve performance for direct IOs.
.El
.Ss User Properties
In addition to the standard native properties, ZFS supports arbitrary user
Expand Down
1 change: 1 addition & 0 deletions module/zcommon/zfs_valstr.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ _VALSTR_BITFIELD_IMPL(zio_flag,
{ '.', "TH", "TRYHARD" },
{ '.', "OP", "OPTIONAL" },
{ '.', "RD", "DIO_READ" },
{ '.', "WD", "DIO_WRITE" },
{ '.', "DQ", "DONT_QUEUE" },
{ '.', "DP", "DONT_PROPAGATE" },
{ '.', "BY", "IO_BYPASS" },
Expand Down
3 changes: 3 additions & 0 deletions module/zcommon/zpool_prop.c
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,9 @@ vdev_prop_init(void)
zprop_register_index(VDEV_PROP_TRIM_SUPPORT, "trim_support", 0,
PROP_READONLY, ZFS_TYPE_VDEV, "on | off", "TRIMSUP",
boolean_table, sfeatures);
zprop_register_index(VDEV_PROP_QUEUE_IO, "queue_io", 1,
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "QUEUE_IO",
boolean_table, sfeatures);

/* default index properties */
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
Expand Down
3 changes: 2 additions & 1 deletion module/zfs/dmu_direct.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ dmu_write_direct(zio_t *pio, dmu_buf_impl_t *db, abd_t *data, dmu_tx_t *tx)
zio_t *zio = zio_write(pio, os->os_spa, txg, bp, data,
db->db.db_size, db->db.db_size, &zp,
dmu_write_direct_ready, NULL, dmu_write_direct_done, dsa,
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb);
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL | ZIO_FLAG_DIO_WRITE,
&zb);

if (pio == NULL)
return (zio_wait(zio));
Expand Down
12 changes: 12 additions & 0 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,8 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
vd->vdev_slow_io_n = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_N);
vd->vdev_slow_io_t = vdev_prop_default_numeric(VDEV_PROP_SLOW_IO_T);

vd->vdev_queue_io = vdev_prop_default_numeric(VDEV_PROP_QUEUE_IO);

list_link_init(&vd->vdev_config_dirty_node);
list_link_init(&vd->vdev_state_dirty_node);
list_link_init(&vd->vdev_initialize_node);
Expand Down Expand Up @@ -6053,6 +6055,15 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
}
vd->vdev_slow_io_t = intval;
break;
case VDEV_PROP_QUEUE_IO:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
if (vd->vdev_ops->vdev_op_leaf) {
vd->vdev_queue_io = intval;
}
break;
default:
/* Most processing is done in vdev_props_set_sync */
break;
Expand Down Expand Up @@ -6416,6 +6427,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
case VDEV_PROP_IO_T:
case VDEV_PROP_SLOW_IO_N:
case VDEV_PROP_SLOW_IO_T:
case VDEV_PROP_QUEUE_IO:
err = vdev_prop_get_int(vd, prop, &intval);
if (err && err != ENOENT)
break;
Expand Down
10 changes: 10 additions & 0 deletions module/zfs/vdev_queue.c
Original file line number Diff line number Diff line change
Expand Up @@ -946,6 +946,12 @@ vdev_queue_io(zio_t *zio)
zio->io_flags |= ZIO_FLAG_DONT_QUEUE;
zio->io_timestamp = gethrtime();

if (!zio->io_vd->vdev_queue_io &&
zio->io_flags & (ZIO_FLAG_DIO_READ | ZIO_FLAG_DIO_WRITE)) {
zio->io_queue_state = ZIO_QS_NONE;
return (zio);
}

mutex_enter(&vq->vq_lock);
vdev_queue_io_add(vq, zio);
nio = vdev_queue_io_to_issue(vq);
Expand Down Expand Up @@ -978,6 +984,10 @@ vdev_queue_io_done(zio_t *zio)
vq->vq_io_complete_ts = now;
vq->vq_io_delta_ts = zio->io_delta = now - zio->io_timestamp;

if (zio->io_queue_state == ZIO_QS_NONE) {
return;
}

mutex_enter(&vq->vq_lock);
vdev_queue_pending_remove(vq, zio);

Expand Down
2 changes: 1 addition & 1 deletion tests/runfiles/common.run
Original file line number Diff line number Diff line change
Expand Up @@ -544,7 +544,7 @@ tags = ['functional', 'cli_root', 'zpool_scrub']
[tests/functional/cli_root/zpool_set]
tests = ['zpool_set_001_pos', 'zpool_set_002_neg', 'zpool_set_003_neg',
'zpool_set_ashift', 'zpool_set_features', 'vdev_set_001_pos',
'user_property_001_pos', 'user_property_002_neg']
'user_property_001_pos', 'user_property_002_neg', 'vdev_set_queue_io']
tags = ['functional', 'cli_root', 'zpool_set']

[tests/functional/cli_root/zpool_split]
Expand Down
1 change: 1 addition & 0 deletions tests/zfs-tests/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -1233,6 +1233,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/cli_root/zpool_set/setup.ksh \
functional/cli_root/zpool/setup.ksh \
functional/cli_root/zpool_set/vdev_set_001_pos.ksh \
functional/cli_root/zpool_set/vdev_set_queue_io.ksh \
functional/cli_root/zpool_set/zpool_set_common.kshlib \
functional/cli_root/zpool_set/zpool_set_001_pos.ksh \
functional/cli_root/zpool_set/zpool_set_002_neg.ksh \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,5 @@ typeset -a properties=(
trim_support
trim_errors
slow_ios
queue_io
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright (c) 2024 by Triad National Security, LLC.
#

. $STF_SUITE/include/libtest.shlib

#
# DESCRIPTION:
# Toggling vdev queue_io property while reading from vdev should not cause panic.
#
# STRATEGY:
# 1. Create a zpool
# 2. Write a file to the pool.
# 3. Start reading from file, while also toggling the queue_io property on / off.
#

verify_runnable "global"

command -v fio > /dev/null || log_unsupported "fio missing"
log_must save_tunable DIO_ENABLED
log_must set_tunable32 DIO_ENABLED 1

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you need to save/set/restore the zfs_dio_enabled module param to use Direct IO

log_must save_tunable DIO_ENABLED
log_must set_tunable32 DIO_ENABLED 1
...
log_must restore_tunable DIO_ENABLED

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

function toggle_queue_io
{
zpool set queue_io=off $TESTPOOL1 $FILEDEV
sleep 0.1
zpool set queue_io=on $TESTPOOL1 $FILEDEV
sleep 0.1
}

function cleanup
{
log_must destroy_pool $TESTPOOL1
rm -f $FILEDEV
log_must restore_tunable DIO_ENABLED
}

log_assert "Toggling vdev queue_io property while reading from vdev should not cause panic"
log_onexit cleanup

# 1. Create a pool

FILEDEV="$TEST_BASE_DIR/filedev.$$"
log_must truncate -s $(($MINVDEVSIZE * 2)) $FILEDEV
log_must create_pool $TESTPOOL1 $FILEDEV

mntpnt=$(get_prop mountpoint $TESTPOOL1)

# 2. Write a file to the pool, while also toggling the queue_io property on / off.

log_must eval "fio --filename=$mntpnt/foobar --name=write-file \
--rw=write --size=$MINVDEVSIZE --bs=128k --numjobs=1 --direct=1 \
--ioengine=sync --runtime=10 &"

ITERATIONS=30

for i in $(seq $ITERATIONS); do
log_must toggle_queue_io
done;
wait

# 3. Starting reading from file, while also toggling the queue_io property on / off.

log_must eval "fio --filename=$mntpnt/foobar --name=read-file \
--rw=read --size=$MINVDEVSIZE --bs=128k --numjobs=1 --direct=1 \
--ioengine=sync --time_based --runtime=10 &"

for i in $(seq $ITERATIONS); do
log_must toggle_queue_io
done;
wait

log_pass "Toggling vdev queue_io property while reading from vdev does not cause panic"