From 984f20f3dfc134285a96bd2cfa35e45343a297ed Mon Sep 17 00:00:00 2001 From: "Charles L. Hedrick" Date: Mon, 1 Jul 2024 15:33:24 -0400 Subject: [PATCH] Add module parameter to disable prefetch in zfs_readdir Add paramter zfs_readdir_dnode_prefetch_limit, defaulting to 0, to control whether zfs_readdir prefetched metadata for objects it look at when reading a directory. If zero, metadata is prefetched for all directory entries. If non-zero, metadata is prefetched only if directory has fewer entries than this. Setting it to non-0 can be important for NFS servers with directories containing many subdirectories. Signed-off-by: Charles Hedrick Co-authored-by: Chris Siebenmann --- man/man4/zfs.4 | 12 ++++++++++++ module/os/linux/zfs/zfs_vnops_os.c | 9 +++++++++ 2 files changed, 21 insertions(+) diff --git a/man/man4/zfs.4 b/man/man4/zfs.4 index 3f7485fa78c..c12f838c5b8 100644 --- a/man/man4/zfs.4 +++ b/man/man4/zfs.4 @@ -1774,6 +1774,18 @@ intact. Unlike predictive prefetch, prescient prefetch never issues I/O that ends up not being needed, so it can't hurt performance. . +.It Sy zfs_readdir_dnode_prefetch_limit Ns = Ns Sy 0 Pq u64 +Disable prefetches in readdir for large directories. +(Normally off) +When readdir searches a directory, it normally prefetches metadata for +all objects in the directory it checks, even if it's just +looking for a single object. +Setting this to a non-zero value disables that prefetching for directories +with more entries than that value. +Disabling it for large directories can greatly lower CPU usage on NFS servers where directories +have a very large number of subdirectories. +A reasonable value would be 20000. +. .It Sy zfs_qat_checksum_disable Ns = Ns Sy 0 Ns | Ns 1 Pq int Disable QAT hardware acceleration for SHA256 checksums. May be unset after the ZFS modules have been loaded to initialize the QAT diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c index 1cecad9f775..efe2371fae3 100644 --- a/module/os/linux/zfs/zfs_vnops_os.c +++ b/module/os/linux/zfs/zfs_vnops_os.c @@ -1504,6 +1504,7 @@ zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd, cred_t *cr, * We use 0 for '.', and 1 for '..'. If this is the root of the filesystem, * we use the offset 2 for the '.zfs' directory. */ +static ulong_t zfs_readdir_dnode_prefetch_limit = 0UL; int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr) { @@ -1537,6 +1538,9 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr) os = zfsvfs->z_os; offset = ctx->pos; prefetch = zp->z_zn_prefetch; + if (zfs_readdir_dnode_prefetch_limit && + zp->z_size > zfs_readdir_dnode_prefetch_limit) + prefetch = B_FALSE; /* * Initialize the iterator cursor. @@ -4252,4 +4256,9 @@ EXPORT_SYMBOL(zfs_map); /* CSTYLED */ module_param(zfs_delete_blocks, ulong, 0644); MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async"); + +/* CSTYLED */ +module_param(zfs_readdir_dnode_prefetch_limit, ulong, 0644); +MODULE_PARM_DESC(zfs_readdir_dnode_prefetch_limit, + "No zfs_readdir prefetch if non-zero and size > this"); #endif