From 5268de78e1e61c828174cb2ac7d9c566a97cce69 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Mon, 8 Apr 2024 21:36:56 +0200
Subject: [PATCH 01/14] dm-crypt: add the optional "high_priority" flag

When WQ_HIGHPRI was used for the dm-crypt kcryptd workqueue it was
reported that dm-crypt performs badly when the system is loaded[1].
Because of reports of audio skipping, dm-crypt stopped using
WQ_HIGHPRI with commit f612b2132db5 (Revert "dm crypt: use WQ_HIGHPRI
for the IO and crypt workqueues").

But it has since been determined that WQ_HIGHPRI provides improved
performance (with reduced latency) for highend systems with much more
resources than those laptop/desktop users which suffered from the use
of WQ_HIGHPRI.

As such, add an option "high_priority" that allows the use of
WQ_HIGHPRI for dm-crypt's workqueues and also sets the write_thread to
nice level MIN_NICE (-20). This commit makes it optional, so that
normal users won't be harmed by it.

[1] https://listman.redhat.com/archives/dm-devel/2023-February/053410.html

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 .../admin-guide/device-mapper/dm-crypt.rst    |  5 +++
 drivers/md/dm-crypt.c                         | 35 +++++++++++++------
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst
index aa2d04d95df6c..41f5f57f00ebf 100644
--- a/Documentation/admin-guide/device-mapper/dm-crypt.rst
+++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst
@@ -113,6 +113,11 @@ same_cpu_crypt
     The default is to use an unbound workqueue so that encryption work
     is automatically balanced between available CPUs.
 
+high_priority
+    Set dm-crypt workqueues and the writer thread to high priority. This
+    improves throughput and latency of dm-crypt while degrading general
+    responsiveness of the system.
+
 submit_from_crypt_cpus
     Disable offloading writes to a separate thread after encryption.
     There are some situations where offloading write bios from the
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 9a74c6316c5da..eabc576d8d28f 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -137,9 +137,9 @@ struct iv_elephant_private {
  * and encrypts / decrypts at the same time.
  */
 enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
-	     DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
-	     DM_CRYPT_NO_READ_WORKQUEUE, DM_CRYPT_NO_WRITE_WORKQUEUE,
-	     DM_CRYPT_WRITE_INLINE };
+	     DM_CRYPT_SAME_CPU, DM_CRYPT_HIGH_PRIORITY,
+	     DM_CRYPT_NO_OFFLOAD, DM_CRYPT_NO_READ_WORKQUEUE,
+	     DM_CRYPT_NO_WRITE_WORKQUEUE, DM_CRYPT_WRITE_INLINE };
 
 enum cipher_flags {
 	CRYPT_MODE_INTEGRITY_AEAD,	/* Use authenticated mode for cipher */
@@ -3134,7 +3134,7 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
 	struct crypt_config *cc = ti->private;
 	struct dm_arg_set as;
 	static const struct dm_arg _args[] = {
-		{0, 8, "Invalid number of feature args"},
+		{0, 9, "Invalid number of feature args"},
 	};
 	unsigned int opt_params, val;
 	const char *opt_string, *sval;
@@ -3161,6 +3161,8 @@ static int crypt_ctr_optional(struct dm_target *ti, unsigned int argc, char **ar
 
 		else if (!strcasecmp(opt_string, "same_cpu_crypt"))
 			set_bit(DM_CRYPT_SAME_CPU, &cc->flags);
+		else if (!strcasecmp(opt_string, "high_priority"))
+			set_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags);
 
 		else if (!strcasecmp(opt_string, "submit_from_crypt_cpus"))
 			set_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
@@ -3232,6 +3234,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	const char *devname = dm_table_device_name(ti->table);
 	int key_size;
 	unsigned int align_mask;
+	unsigned int common_wq_flags;
 	unsigned long long tmpll;
 	int ret;
 	size_t iv_size_padding, additional_req_size;
@@ -3399,19 +3402,25 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	}
 
 	ret = -ENOMEM;
-	cc->io_queue = alloc_workqueue("kcryptd_io/%s", WQ_MEM_RECLAIM, 1, devname);
+	common_wq_flags = WQ_MEM_RECLAIM;
+	if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
+		common_wq_flags |= WQ_HIGHPRI;
+
+	cc->io_queue = alloc_workqueue("kcryptd_io/%s", common_wq_flags, 1, devname);
 	if (!cc->io_queue) {
 		ti->error = "Couldn't create kcryptd io queue";
 		goto bad;
 	}
 
-	if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
-		cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
+	if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) {
+		cc->crypt_queue = alloc_workqueue("kcryptd/%s",
+						  common_wq_flags | WQ_CPU_INTENSIVE,
 						  1, devname);
-	else
+	} else {
 		cc->crypt_queue = alloc_workqueue("kcryptd/%s",
-						  WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
+						  common_wq_flags | WQ_CPU_INTENSIVE | WQ_UNBOUND,
 						  num_online_cpus(), devname);
+	}
 	if (!cc->crypt_queue) {
 		ti->error = "Couldn't create kcryptd queue";
 		goto bad;
@@ -3427,6 +3436,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		ti->error = "Couldn't spawn write thread";
 		goto bad;
 	}
+	if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
+		set_user_nice(cc->write_thread, MIN_NICE);
 
 	ti->num_flush_bios = 1;
 	ti->limit_swap_bios = true;
@@ -3547,6 +3558,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
 
 		num_feature_args += !!ti->num_discard_bios;
 		num_feature_args += test_bit(DM_CRYPT_SAME_CPU, &cc->flags);
+		num_feature_args += test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags);
 		num_feature_args += test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags);
 		num_feature_args += test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags);
 		num_feature_args += test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags);
@@ -3560,6 +3572,8 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
 				DMEMIT(" allow_discards");
 			if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
 				DMEMIT(" same_cpu_crypt");
+			if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
+				DMEMIT(" high_priority");
 			if (test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags))
 				DMEMIT(" submit_from_crypt_cpus");
 			if (test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags))
@@ -3579,6 +3593,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type,
 		DMEMIT_TARGET_NAME_VERSION(ti->type);
 		DMEMIT(",allow_discards=%c", ti->num_discard_bios ? 'y' : 'n');
 		DMEMIT(",same_cpu_crypt=%c", test_bit(DM_CRYPT_SAME_CPU, &cc->flags) ? 'y' : 'n');
+		DMEMIT(",high_priority=%c", test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags) ? 'y' : 'n');
 		DMEMIT(",submit_from_crypt_cpus=%c", test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags) ?
 		       'y' : 'n');
 		DMEMIT(",no_read_workqueue=%c", test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags) ?
@@ -3706,7 +3721,7 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 static struct target_type crypt_target = {
 	.name   = "crypt",
-	.version = {1, 25, 0},
+	.version = {1, 26, 0},
 	.module = THIS_MODULE,
 	.ctr    = crypt_ctr,
 	.dtr    = crypt_dtr,

From 2285e1496dc68787a626145cfb0904a1b6cc5501 Mon Sep 17 00:00:00 2001
From: yangerkun <yangerkun@huawei.com>
Date: Tue, 11 Jul 2023 14:11:29 +0800
Subject: [PATCH 02/14] dm-crypt: export sysfs of all workqueues

Once there is a heavy IO load, so many encrypt/decrypt work will occupy
all of the cpu, which may lead to the poor performance for other service.
So the improved visibility and controls over dm-crypt workqueues, as
was offered with commit a2b8b2d97567 ("dm crypt: export sysfs of
kcryptd workqueue"), seems necessary. By exporting dm-crypt's
workqueues in sysfs, the entry like cpumask/max_active and so on can
help us to limit the CPU usage for encrypt/decrypt work.

However, commit a2b8b2d97567 did not consider that DM table reload
will call .ctr before .dtr, so the reload for dm-crypt failed because
the same sysfs name was present. This was the original need for commit
48b0777cd93d ("Revert "dm crypt: export sysfs of kcryptd workqueue"").

Reintroduce the use of WQ_SYSFS, and use it for both the IO and crypt
workqueues, but make the workqueue names include a unique id (via ida)
to allow both old and new sysfs entries to coexist.

Signed-off-by: yangerkun <yangerkun@huawei.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 drivers/md/dm-crypt.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index eabc576d8d28f..5bfa357601672 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -47,6 +47,8 @@
 
 #define DM_MSG_PREFIX "crypt"
 
+static DEFINE_IDA(workqueue_ida);
+
 /*
  * context holding the current state of a multi-part conversion
  */
@@ -184,6 +186,7 @@ struct crypt_config {
 		struct crypto_aead **tfms_aead;
 	} cipher_tfm;
 	unsigned int tfms_count;
+	int workqueue_id;
 	unsigned long cipher_flags;
 
 	/*
@@ -2771,6 +2774,9 @@ static void crypt_dtr(struct dm_target *ti)
 	if (cc->crypt_queue)
 		destroy_workqueue(cc->crypt_queue);
 
+	if (cc->workqueue_id)
+		ida_free(&workqueue_ida, cc->workqueue_id);
+
 	crypt_free_tfms(cc);
 
 	bioset_exit(&cc->bs);
@@ -3232,7 +3238,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 {
 	struct crypt_config *cc;
 	const char *devname = dm_table_device_name(ti->table);
-	int key_size;
+	int key_size, wq_id;
 	unsigned int align_mask;
 	unsigned int common_wq_flags;
 	unsigned long long tmpll;
@@ -3401,25 +3407,33 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		cc->tag_pool_max_sectors <<= cc->sector_shift;
 	}
 
+	wq_id = ida_alloc_min(&workqueue_ida, 1, GFP_KERNEL);
+	if (wq_id < 0) {
+		ti->error = "Couldn't get workqueue id";
+		ret = wq_id;
+		goto bad;
+	}
+	cc->workqueue_id = wq_id;
+
 	ret = -ENOMEM;
-	common_wq_flags = WQ_MEM_RECLAIM;
+	common_wq_flags = WQ_MEM_RECLAIM | WQ_SYSFS;
 	if (test_bit(DM_CRYPT_HIGH_PRIORITY, &cc->flags))
 		common_wq_flags |= WQ_HIGHPRI;
 
-	cc->io_queue = alloc_workqueue("kcryptd_io/%s", common_wq_flags, 1, devname);
+	cc->io_queue = alloc_workqueue("kcryptd_io-%s-%d", common_wq_flags, 1, devname, wq_id);
 	if (!cc->io_queue) {
 		ti->error = "Couldn't create kcryptd io queue";
 		goto bad;
 	}
 
 	if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags)) {
-		cc->crypt_queue = alloc_workqueue("kcryptd/%s",
+		cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
 						  common_wq_flags | WQ_CPU_INTENSIVE,
-						  1, devname);
+						  1, devname, wq_id);
 	} else {
-		cc->crypt_queue = alloc_workqueue("kcryptd/%s",
+		cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
 						  common_wq_flags | WQ_CPU_INTENSIVE | WQ_UNBOUND,
-						  num_online_cpus(), devname);
+						  num_online_cpus(), devname, wq_id);
 	}
 	if (!cc->crypt_queue) {
 		ti->error = "Couldn't create kcryptd queue";

From 7560680c8d1e810826d048a61d35668541dce038 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@kernel.org>
Date: Thu, 11 Apr 2024 15:08:05 -0400
Subject: [PATCH 03/14] dm-crypt: stop constraining max_segment_size to
 PAGE_SIZE

This change effectively reverts commit 586b286b110e ("dm crypt:
constrain crypt device's max_segment_size to PAGE_SIZE") and relies on
block core's late bio-splitting to ensure that dm-crypt's encryption
bios are split accordingly if they exceed the underlying device's
limits (e.g. max_segment_size).

Commit 586b286b110e was applied as a 4.3 fix for the benefit of
stable@ kernels 4.0+ just after block core's late bio-splitting was
introduced in 4.3 with commit 54efd50bfd873 ("block: make
generic_make_request handle arbitrarily sized bios"). Given block
core's late bio-splitting it is past time that dm-crypt make use of
it.

Also, given the recent need to revert meaningful progress that was
attempted during the 6.9 merge window (see commit bff4b74625fe Revert
"dm: use queue_limits_set") this change allows DM core to safely make
use of queue_limits_set() without risk of breaking dm-crypt on NVMe.
Though it should be noted this commit isn't a prereq for reinstating
DM core's use of queue_limits_set() because blk_validate_limits() was
made less strict with commit b561ea56a264 ("block: allow device to
have both virt_boundary_mask and max segment size").

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 drivers/md/dm-crypt.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 5bfa357601672..f43a2c0b3d77c 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1656,8 +1656,8 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
 
 /*
  * Generate a new unfragmented bio with the given size
- * This should never violate the device limitations (but only because
- * max_segment_size is being constrained to PAGE_SIZE).
+ * This should never violate the device limitations (but if it did then block
+ * core should split the bio as needed).
  *
  * This function may be called concurrently. If we allocate from the mempool
  * concurrently, there is a possibility of deadlock. For example, if we have
@@ -3717,14 +3717,6 @@ static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
 	struct crypt_config *cc = ti->private;
 
-	/*
-	 * Unfortunate constraint that is required to avoid the potential
-	 * for exceeding underlying device's max_segments limits -- due to
-	 * crypt_alloc_buffer() possibly allocating pages for the encryption
-	 * bio that are not as physically contiguous as the original bio.
-	 */
-	limits->max_segment_size = PAGE_SIZE;
-
 	limits->logical_block_size =
 		max_t(unsigned int, limits->logical_block_size, cc->sector_size);
 	limits->physical_block_size =

From 1c0e720228ad1c63bb487cdcead2558353b5a067 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Feb 2024 14:56:42 -0800
Subject: [PATCH 04/14] dm: use queue_limits_set

Use queue_limits_set which validates the limits and takes care of
updating the readahead settings instead of directly assigning them to
the queue.  For that make sure all limits are actually updated before
the assignment.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 drivers/md/dm-table.c | 27 ++++++++++++---------------
 1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 41f1d731ae5ac..88114719fe187 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1963,26 +1963,27 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	bool wc = false, fua = false;
 	int r;
 
-	/*
-	 * Copy table's limits to the DM device's request_queue
-	 */
-	q->limits = *limits;
-
 	if (dm_table_supports_nowait(t))
 		blk_queue_flag_set(QUEUE_FLAG_NOWAIT, q);
 	else
 		blk_queue_flag_clear(QUEUE_FLAG_NOWAIT, q);
 
 	if (!dm_table_supports_discards(t)) {
-		q->limits.max_discard_sectors = 0;
-		q->limits.max_hw_discard_sectors = 0;
-		q->limits.discard_granularity = 0;
-		q->limits.discard_alignment = 0;
-		q->limits.discard_misaligned = 0;
+		limits->max_hw_discard_sectors = 0;
+		limits->discard_granularity = 0;
+		limits->discard_alignment = 0;
+		limits->discard_misaligned = 0;
 	}
 
+	if (!dm_table_supports_write_zeroes(t))
+		limits->max_write_zeroes_sectors = 0;
+
 	if (!dm_table_supports_secure_erase(t))
-		q->limits.max_secure_erase_sectors = 0;
+		limits->max_secure_erase_sectors = 0;
+
+	r = queue_limits_set(q, limits);
+	if (r)
+		return r;
 
 	if (dm_table_supports_flush(t, (1UL << QUEUE_FLAG_WC))) {
 		wc = true;
@@ -2007,9 +2008,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	else
 		blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
 
-	if (!dm_table_supports_write_zeroes(t))
-		q->limits.max_write_zeroes_sectors = 0;
-
 	dm_table_verify_integrity(t);
 
 	/*
@@ -2047,7 +2045,6 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 	}
 
 	dm_update_crypto_profile(q, t);
-	disk_update_readahead(t->md->disk);
 
 	/*
 	 * Check for request-based device is left to

From 83637d9017b22a5e11ada9f44ba776beb807222b Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@kernel.org>
Date: Fri, 12 Apr 2024 10:58:05 -0400
Subject: [PATCH 05/14] dm-crypt: don't set WQ_CPU_INTENSIVE for WQ_UNBOUND
 crypt_queue

Fix crypt_queue's use of WQ_UNBOUND to _not_ use WQ_CPU_INTENSIVE
because it is meaningless with WQ_UNBOUND.

Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 drivers/md/dm-crypt.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index f43a2c0b3d77c..1b7a97cc37794 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -3431,8 +3431,12 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 						  common_wq_flags | WQ_CPU_INTENSIVE,
 						  1, devname, wq_id);
 	} else {
+		/*
+		 * While crypt_queue is certainly CPU intensive, the use of
+		 * WQ_CPU_INTENSIVE is meaningless with WQ_UNBOUND.
+		 */
 		cc->crypt_queue = alloc_workqueue("kcryptd-%s-%d",
-						  common_wq_flags | WQ_CPU_INTENSIVE | WQ_UNBOUND,
+						  common_wq_flags | WQ_UNBOUND,
 						  num_online_cpus(), devname, wq_id);
 	}
 	if (!cc->crypt_queue) {

From 8d24790ed08ab4e619ce58ed4a1b353ab77ffdc5 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Tue, 7 May 2024 17:16:23 -0400
Subject: [PATCH 06/14] dm-delay: fix workqueue delay_timer race

delay_timer could be pending when delay_dtr() is called. It needs to be
shut down before kdelayd_wq is destroyed, so it won't try queueing more
work to kdelayd_wq while that's getting destroyed.

Also the del_timer_sync() call in delay_presuspend() doesn't protect
against the timer getting immediately rearmed by the queued call to
flush_delayed_bios(), but there's no real harm if that does happen.
timer_delete() is less work, and is basically just as likely to stop a
pointless call to flush_delayed_bios().

Fixes: 26b9f228703f ("dm: delay target")
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 drivers/md/dm-delay.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 5eabdb06c6498..eec0daa4b227a 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -154,8 +154,10 @@ static void delay_dtr(struct dm_target *ti)
 {
 	struct delay_c *dc = ti->private;
 
-	if (dc->kdelayd_wq)
+	if (dc->kdelayd_wq) {
+		timer_shutdown_sync(&dc->delay_timer);
 		destroy_workqueue(dc->kdelayd_wq);
+	}
 
 	if (dc->read.dev)
 		dm_put_device(ti, dc->read.dev);
@@ -335,7 +337,7 @@ static void delay_presuspend(struct dm_target *ti)
 	mutex_unlock(&delayed_bios_lock);
 
 	if (!delay_is_fast(dc))
-		del_timer_sync(&dc->delay_timer);
+		timer_delete(&dc->delay_timer);
 	flush_delayed_bios(dc, true);
 }
 

From d14646f23300a5fc85be867bafdc0702c2002789 Mon Sep 17 00:00:00 2001
From: Joel Colledge <joel.colledge@linbit.com>
Date: Mon, 6 May 2024 09:25:23 +0200
Subject: [PATCH 07/14] dm-delay: fix hung task introduced by kthread mode

If the worker thread is not woken due to a bio, then it is not woken at
all. This causes the hung task check to trigger. This occurs, for
instance, when no bios are submitted. Also when a delay of 0 is
configured, delay_bio() returns without waking the worker.

Prevent the hung task check from triggering by creating the thread with
kthread_run() instead of using kthread_create() directly.

Fixes: 70bbeb29fab0 ("dm delay: for short delays, use kthread instead of timers and wq")
Signed-off-by: Joel Colledge <joel.colledge@linbit.com>
Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 drivers/md/dm-delay.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index eec0daa4b227a..4ba12d5369949 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -269,8 +269,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		 * In case of small requested delays, use kthread instead of
 		 * timers and workqueue to achieve better latency.
 		 */
-		dc->worker = kthread_create(&flush_worker_fn, dc,
-					    "dm-delay-flush-worker");
+		dc->worker = kthread_run(&flush_worker_fn, dc, "dm-delay-flush-worker");
 		if (IS_ERR(dc->worker)) {
 			ret = PTR_ERR(dc->worker);
 			dc->worker = NULL;

From 64eb88d6caee2c8eb806a68dab3f184f14f818a4 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Mon, 6 May 2024 17:55:44 -0400
Subject: [PATCH 08/14] dm-delay: fix max_delay calculations

delay_ctr() pointlessly compared max_delay in cases where multiple delay
classes were initialized identically. Also, when write delays were
configured different than read delays, delay_ctr() never compared their
value against max_delay. Fix these issues.

Fixes: 70bbeb29fab0 ("dm delay: for short delays, use kthread instead of timers and wq")
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 drivers/md/dm-delay.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 4ba12d5369949..2ac43d1f1b92c 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -242,19 +242,18 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 		ret = delay_class_ctr(ti, &dc->flush, argv);
 		if (ret)
 			goto bad;
-		max_delay = max(max_delay, dc->write.delay);
-		max_delay = max(max_delay, dc->flush.delay);
 		goto out;
 	}
 
 	ret = delay_class_ctr(ti, &dc->write, argv + 3);
 	if (ret)
 		goto bad;
+	max_delay = max(max_delay, dc->write.delay);
+
 	if (argc == 6) {
 		ret = delay_class_ctr(ti, &dc->flush, argv + 3);
 		if (ret)
 			goto bad;
-		max_delay = max(max_delay, dc->flush.delay);
 		goto out;
 	}
 

From c542ee149230c4c3fc086feae608230e7aa97fcf Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Tue, 7 May 2024 17:16:24 -0400
Subject: [PATCH 09/14] dm-delay: change locking to avoid contention

The delayed_bios list is protected by one mutex shared by all dm-delay
devices. This mutex must be held whenever a bio is added or expired bios
are removed from the list.  Since a large number of expired bios could
be on the list, flush_delayed_bios() can schedule while holding the
mutex. This means a flush_delayed_bios() call on any dm-delay device can
slow down delay_map() calls on any other dm-delay device.

To keep dm-delay devices from slowing each other down and keep
processing delay bios from slowing adding delayed bios, the global mutex
has been removed, and each dm-delay device now has two locks.
delayed_bios_lock is a spinlock that must be held whenever the
delayed_bios list is accessed. process_bios_lock is a mutex that must be
held whenever a process has temporarily pulled bios off the delayed_bios
list to check which ones should be processed. It must be held until all
the bios that won't be processed are returned to the list. This is what
flush_delayed_bios() now does. The mutex is necessary to guarantee that
delay_presuspend() sees the entire list of delayed bios when it calls
flush_delayed_bios().

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 drivers/md/dm-delay.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 2ac43d1f1b92c..da3f8131d52c2 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -29,6 +29,8 @@ struct delay_class {
 struct delay_c {
 	struct timer_list delay_timer;
 	struct mutex timer_lock;
+	struct mutex process_bios_lock; /* hold while removing bios to be processed from list */
+	spinlock_t delayed_bios_lock; /* hold on all accesses to delayed_bios list */
 	struct workqueue_struct *kdelayd_wq;
 	struct work_struct flush_expired_bios;
 	struct list_head delayed_bios;
@@ -49,8 +51,6 @@ struct dm_delay_info {
 	unsigned long expires;
 };
 
-static DEFINE_MUTEX(delayed_bios_lock);
-
 static void handle_delayed_timer(struct timer_list *t)
 {
 	struct delay_c *dc = from_timer(dc, t, delay_timer);
@@ -89,12 +89,16 @@ static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
 {
 	struct dm_delay_info *delayed, *next;
 	struct bio_list flush_bio_list;
+	LIST_HEAD(local_list);
 	unsigned long next_expires = 0;
 	bool start_timer = false;
 	bio_list_init(&flush_bio_list);
 
-	mutex_lock(&delayed_bios_lock);
-	list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
+	mutex_lock(&dc->process_bios_lock);
+	spin_lock(&dc->delayed_bios_lock);
+	list_replace_init(&dc->delayed_bios, &local_list);
+	spin_unlock(&dc->delayed_bios_lock);
+	list_for_each_entry_safe(delayed, next, &local_list, list) {
 		cond_resched();
 		if (flush_all || time_after_eq(jiffies, delayed->expires)) {
 			struct bio *bio = dm_bio_from_per_bio_data(delayed,
@@ -114,7 +118,10 @@ static void flush_delayed_bios(struct delay_c *dc, bool flush_all)
 			}
 		}
 	}
-	mutex_unlock(&delayed_bios_lock);
+	spin_lock(&dc->delayed_bios_lock);
+	list_splice(&local_list, &dc->delayed_bios);
+	spin_unlock(&dc->delayed_bios_lock);
+	mutex_unlock(&dc->process_bios_lock);
 
 	if (start_timer)
 		queue_timeout(dc, next_expires);
@@ -128,13 +135,13 @@ static int flush_worker_fn(void *data)
 
 	while (!kthread_should_stop()) {
 		flush_delayed_bios(dc, false);
-		mutex_lock(&delayed_bios_lock);
+		spin_lock(&dc->delayed_bios_lock);
 		if (unlikely(list_empty(&dc->delayed_bios))) {
 			set_current_state(TASK_INTERRUPTIBLE);
-			mutex_unlock(&delayed_bios_lock);
+			spin_unlock(&dc->delayed_bios_lock);
 			schedule();
 		} else {
-			mutex_unlock(&delayed_bios_lock);
+			spin_unlock(&dc->delayed_bios_lock);
 			cond_resched();
 		}
 	}
@@ -168,6 +175,7 @@ static void delay_dtr(struct dm_target *ti)
 	if (dc->worker)
 		kthread_stop(dc->worker);
 
+	mutex_destroy(&dc->process_bios_lock);
 	mutex_destroy(&dc->timer_lock);
 
 	kfree(dc);
@@ -227,6 +235,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 	ti->private = dc;
 	INIT_LIST_HEAD(&dc->delayed_bios);
 	mutex_init(&dc->timer_lock);
+	mutex_init(&dc->process_bios_lock);
+	spin_lock_init(&dc->delayed_bios_lock);
 	dc->may_delay = true;
 	dc->argc = argc;
 
@@ -309,14 +319,14 @@ static int delay_bio(struct delay_c *dc, struct delay_class *c, struct bio *bio)
 	delayed->context = dc;
 	delayed->expires = expires = jiffies + msecs_to_jiffies(c->delay);
 
-	mutex_lock(&delayed_bios_lock);
+	spin_lock(&dc->delayed_bios_lock);
 	if (unlikely(!dc->may_delay)) {
-		mutex_unlock(&delayed_bios_lock);
+		spin_unlock(&dc->delayed_bios_lock);
 		return DM_MAPIO_REMAPPED;
 	}
 	c->ops++;
 	list_add_tail(&delayed->list, &dc->delayed_bios);
-	mutex_unlock(&delayed_bios_lock);
+	spin_unlock(&dc->delayed_bios_lock);
 
 	if (delay_is_fast(dc))
 		wake_up_process(dc->worker);
@@ -330,9 +340,9 @@ static void delay_presuspend(struct dm_target *ti)
 {
 	struct delay_c *dc = ti->private;
 
-	mutex_lock(&delayed_bios_lock);
+	spin_lock(&dc->delayed_bios_lock);
 	dc->may_delay = false;
-	mutex_unlock(&delayed_bios_lock);
+	spin_unlock(&dc->delayed_bios_lock);
 
 	if (!delay_is_fast(dc))
 		timer_delete(&dc->delay_timer);

From 8b21ac87d550acc4f6207764fed0cf6f0e3966cd Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Tue, 7 May 2024 17:16:25 -0400
Subject: [PATCH 10/14] dm-delay: remove timer_lock

Instead of manually checking the timer details in queue_timeout(), call
timer_reduce() to start the timer or reduce the expiration time. This
avoids needing a lock.

Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 drivers/md/dm-delay.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index da3f8131d52c2..08f6387620c1c 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -28,7 +28,6 @@ struct delay_class {
 
 struct delay_c {
 	struct timer_list delay_timer;
-	struct mutex timer_lock;
 	struct mutex process_bios_lock; /* hold while removing bios to be processed from list */
 	spinlock_t delayed_bios_lock; /* hold on all accesses to delayed_bios list */
 	struct workqueue_struct *kdelayd_wq;
@@ -60,12 +59,7 @@ static void handle_delayed_timer(struct timer_list *t)
 
 static void queue_timeout(struct delay_c *dc, unsigned long expires)
 {
-	mutex_lock(&dc->timer_lock);
-
-	if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
-		mod_timer(&dc->delay_timer, expires);
-
-	mutex_unlock(&dc->timer_lock);
+	timer_reduce(&dc->delay_timer, expires);
 }
 
 static inline bool delay_is_fast(struct delay_c *dc)
@@ -176,7 +170,6 @@ static void delay_dtr(struct dm_target *ti)
 		kthread_stop(dc->worker);
 
 	mutex_destroy(&dc->process_bios_lock);
-	mutex_destroy(&dc->timer_lock);
 
 	kfree(dc);
 }
@@ -234,7 +227,6 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
 
 	ti->private = dc;
 	INIT_LIST_HEAD(&dc->delayed_bios);
-	mutex_init(&dc->timer_lock);
 	mutex_init(&dc->process_bios_lock);
 	spin_lock_init(&dc->delayed_bios_lock);
 	dc->may_delay = true;

From 69381cf88a8dfa0ab27fb801b78be813e7e8fb80 Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Mon, 20 May 2024 16:48:31 +0200
Subject: [PATCH 11/14] dm-integrity: set discard_granularity to logical block
 size

dm-integrity could set discard_granularity lower than the logical block
size. This could result in failures when sending discard requests to
dm-integrity.

This fix is needed for kernels prior to 6.10.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reported-by: Eric Wheeler <linux-integrity@lists.ewheeler.net>
Cc: stable@vger.kernel.org # <= 6.9
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 drivers/md/dm-integrity.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 7f3dc8ee6ab8d..417fddebe367a 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -3492,6 +3492,7 @@ static void dm_integrity_io_hints(struct dm_target *ti, struct queue_limits *lim
 		limits->physical_block_size = ic->sectors_per_block << SECTOR_SHIFT;
 		blk_limits_io_min(limits, ic->sectors_per_block << SECTOR_SHIFT);
 		limits->dma_alignment = limits->logical_block_size - 1;
+		limits->discard_granularity = ic->sectors_per_block << SECTOR_SHIFT;
 	}
 	limits->max_integrity_segments = USHRT_MAX;
 }

From 825d8bbd2f32cb229c3b6653bd454832c3c20acb Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@kernel.org>
Date: Mon, 20 May 2024 13:34:06 -0400
Subject: [PATCH 12/14] dm: always manage discard support in terms of
 max_hw_discard_sectors

Commit 4f563a64732d ("block: add a max_user_discard_sectors queue
limit") changed block core to set max_discard_sectors to:
 min(lim->max_hw_discard_sectors, lim->max_user_discard_sectors)

Since commit 1c0e720228ad ("dm: use queue_limits_set") it was reported
dm-thinp was failing in a few fstests (generic/347 and generic/405)
with the first WARN_ON_ONCE in dm_cell_key_has_valid_range() being
reported, e.g.:
WARNING: CPU: 1 PID: 30 at drivers/md/dm-bio-prison-v1.c:128 dm_cell_key_has_valid_range+0x3d/0x50

blk_set_stacking_limits() sets max_user_discard_sectors to UINT_MAX,
so given how block core now sets max_discard_sectors (detailed above)
it follows that blk_stack_limits() stacks up the underlying device's
max_hw_discard_sectors and max_discard_sectors is set to match it. If
max_hw_discard_sectors exceeds dm's BIO_PRISON_MAX_RANGE, then
dm_cell_key_has_valid_range() will trigger the warning with:
WARN_ON_ONCE(key->block_end - key->block_begin > BIO_PRISON_MAX_RANGE)

Aside from this warning, the discard will fail.  Fix this and other DM
issues by governing discard support in terms of max_hw_discard_sectors
instead of max_discard_sectors.

Reported-by: Theodore Ts'o <tytso@mit.edu>
Fixes: 1c0e720228ad ("dm: use queue_limits_set")
Signed-off-by: Mike Snitzer <snitzer@kernel.org>
---
 drivers/md/dm-cache-target.c | 5 ++---
 drivers/md/dm-clone-target.c | 4 ++--
 drivers/md/dm-log-writes.c   | 2 +-
 drivers/md/dm-snap.c         | 2 +-
 drivers/md/dm-target.c       | 1 -
 drivers/md/dm-thin.c         | 4 ++--
 drivers/md/dm-zero.c         | 1 -
 drivers/md/dm-zoned-target.c | 1 -
 drivers/md/dm.c              | 2 +-
 9 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 911f73f7ebbaa..1f0bc11732303 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -3394,8 +3394,8 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
 
 	if (!cache->features.discard_passdown) {
 		/* No passdown is done so setting own virtual limits */
-		limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
-						    cache->origin_sectors);
+		limits->max_hw_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
+						       cache->origin_sectors);
 		limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
 		return;
 	}
@@ -3404,7 +3404,6 @@ static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
 	 * cache_iterate_devices() is stacking both origin and fast device limits
 	 * but discards aren't passed to fast device, so inherit origin's limits.
 	 */
-	limits->max_discard_sectors = origin_limits->max_discard_sectors;
 	limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors;
 	limits->discard_granularity = origin_limits->discard_granularity;
 	limits->discard_alignment = origin_limits->discard_alignment;
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index 94b2fc33f64be..2332d97981412 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -2050,7 +2050,8 @@ static void set_discard_limits(struct clone *clone, struct queue_limits *limits)
 	if (!test_bit(DM_CLONE_DISCARD_PASSDOWN, &clone->flags)) {
 		/* No passdown is done so we set our own virtual limits */
 		limits->discard_granularity = clone->region_size << SECTOR_SHIFT;
-		limits->max_discard_sectors = round_down(UINT_MAX >> SECTOR_SHIFT, clone->region_size);
+		limits->max_hw_discard_sectors = round_down(UINT_MAX >> SECTOR_SHIFT,
+							    clone->region_size);
 		return;
 	}
 
@@ -2059,7 +2060,6 @@ static void set_discard_limits(struct clone *clone, struct queue_limits *limits)
 	 * device limits but discards aren't passed to the source device, so
 	 * inherit destination's limits.
 	 */
-	limits->max_discard_sectors = dest_limits->max_discard_sectors;
 	limits->max_hw_discard_sectors = dest_limits->max_hw_discard_sectors;
 	limits->discard_granularity = dest_limits->discard_granularity;
 	limits->discard_alignment = dest_limits->discard_alignment;
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index f17a6cf2284ec..8d7df8303d0a1 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -871,7 +871,7 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
 	if (!bdev_max_discard_sectors(lc->dev->bdev)) {
 		lc->device_supports_discard = false;
 		limits->discard_granularity = lc->sectorsize;
-		limits->max_discard_sectors = (UINT_MAX >> SECTOR_SHIFT);
+		limits->max_hw_discard_sectors = (UINT_MAX >> SECTOR_SHIFT);
 	}
 	limits->logical_block_size = bdev_logical_block_size(lc->dev->bdev);
 	limits->physical_block_size = bdev_physical_block_size(lc->dev->bdev);
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 0ace06d1bee38..f40c18da40000 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2410,7 +2410,7 @@ static void snapshot_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 		/* All discards are split on chunk_size boundary */
 		limits->discard_granularity = snap->store->chunk_size;
-		limits->max_discard_sectors = snap->store->chunk_size;
+		limits->max_hw_discard_sectors = snap->store->chunk_size;
 
 		up_read(&_origins_lock);
 	}
diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c
index 0c4efb0bef8a9..652627aea11b6 100644
--- a/drivers/md/dm-target.c
+++ b/drivers/md/dm-target.c
@@ -249,7 +249,6 @@ static int io_err_iterate_devices(struct dm_target *ti,
 
 static void io_err_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
-	limits->max_discard_sectors = UINT_MAX;
 	limits->max_hw_discard_sectors = UINT_MAX;
 	limits->discard_granularity = 512;
 }
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 4793ad2aa1f7e..e0528a4f809c4 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -4100,7 +4100,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
 	if (pt->adjusted_pf.discard_enabled) {
 		disable_discard_passdown_if_not_supported(pt);
 		if (!pt->adjusted_pf.discard_passdown)
-			limits->max_discard_sectors = 0;
+			limits->max_hw_discard_sectors = 0;
 		/*
 		 * The pool uses the same discard limits as the underlying data
 		 * device.  DM core has already set this up.
@@ -4497,7 +4497,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 	if (pool->pf.discard_enabled) {
 		limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
-		limits->max_discard_sectors = pool->sectors_per_block * BIO_PRISON_MAX_RANGE;
+		limits->max_hw_discard_sectors = pool->sectors_per_block * BIO_PRISON_MAX_RANGE;
 	}
 }
 
diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c
index 3b13e6eb1aa47..9a0bb623e823f 100644
--- a/drivers/md/dm-zero.c
+++ b/drivers/md/dm-zero.c
@@ -61,7 +61,6 @@ static int zero_map(struct dm_target *ti, struct bio *bio)
 
 static void zero_io_hints(struct dm_target *ti, struct queue_limits *limits)
 {
-	limits->max_discard_sectors = UINT_MAX;
 	limits->max_hw_discard_sectors = UINT_MAX;
 	limits->discard_granularity = 512;
 }
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 621794a9edd65..12236e6f46f39 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -1001,7 +1001,6 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
 
 	limits->discard_alignment = 0;
 	limits->discard_granularity = DMZ_BLOCK_SIZE;
-	limits->max_discard_sectors = chunk_sectors;
 	limits->max_hw_discard_sectors = chunk_sectors;
 	limits->max_write_zeroes_sectors = chunk_sectors;
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7d0746b37c8ec..3adfc6b83c013 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1086,7 +1086,7 @@ void disable_discard(struct mapped_device *md)
 	struct queue_limits *limits = dm_get_queue_limits(md);
 
 	/* device doesn't really support DISCARD, disable it */
-	limits->max_discard_sectors = 0;
+	limits->max_hw_discard_sectors = 0;
 }
 
 void disable_write_zeroes(struct mapped_device *md)

From d5cfecfe7f3293a4773c4486ac92d742143e8659 Mon Sep 17 00:00:00 2001
From: Bruce Johnston <bjohnsto@redhat.com>
Date: Tue, 9 Jul 2024 13:00:36 -0400
Subject: [PATCH 13/14] dm vdo: replace max_discard_sectors with
 max_hw_discard_sectors

Commit 4f563a64732d ("block: add a max_user_discard_sectors queue
limit") changed block core to set max_discard_sectors to:
min(lim->max_hw_discard_sectors, lim->max_user_discard_sectors)

Commit 825d8bbd2f32 ("dm: always manage discard support in terms
of max_hw_discard_sectors") fixed most dm targetss to deal with
this, by replacing max_discard_sectors with max_hw_discard_sectors.
Unfortunately, dm-vdo did not get fixed at that time.

Fixes: 825d8bbd2f32 ("dm: always manage discard support in terms of max_hw_discard_sectors")
Signed-off-by: Bruce Johnston <bjohnsto@redhat.com>
Signed-off-by: Matthew Sakai <msakai@redhat.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
---
 drivers/md/dm-vdo/dm-vdo-target.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/md/dm-vdo/dm-vdo-target.c b/drivers/md/dm-vdo/dm-vdo-target.c
index 5a4b0a927f560..af77084969e09 100644
--- a/drivers/md/dm-vdo/dm-vdo-target.c
+++ b/drivers/md/dm-vdo/dm-vdo-target.c
@@ -945,7 +945,7 @@ static void vdo_io_hints(struct dm_target *ti, struct queue_limits *limits)
 	 * The value is used by dm-thin to determine whether to pass down discards. The block layer
 	 * splits large discards on this boundary when this is set.
 	 */
-	limits->max_discard_sectors =
+	limits->max_hw_discard_sectors =
 		(vdo->device_config->max_discard_blocks * VDO_SECTORS_PER_BLOCK);
 
 	/*

From cf026394145b5a7bb5da17434c4a81fb3859260a Mon Sep 17 00:00:00 2001
From: Matthew Sakai <msakai@redhat.com>
Date: Tue, 16 Jul 2024 20:49:11 -0400
Subject: [PATCH 14/14] Restore CI workflow files

Rebasing can destory these, so recreate them if necessary.

Signed-off-by: Matthew Sakai <msakai@redhat.com>
---
 .github/workflows/ci.yml     | 97 +++++++++++++++++++++++++++++++++++
 .github/workflows/notify.yml | 98 ++++++++++++++++++++++++++++++++++++
 2 files changed, 195 insertions(+)
 create mode 100644 .github/workflows/ci.yml
 create mode 100644 .github/workflows/notify.yml

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000000000..765bf1753d1fc
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,97 @@
+name: CI
+
+on:
+  pull_request_target:
+    types: [ opened, reopened, ready_for_review, synchronize ]
+  workflow_dispatch:
+
+# This allows a subsequently queued workflow run to interrupt previous runs
+concurrency:
+  group: '${{ github.workflow }} @ ${{ github.event.pull_request.head.label || github.head_ref || github.ref }}'
+  cancel-in-progress: true
+
+jobs:
+  Verifying:
+    if: github.event.pull_request.draft == false
+    runs-on: ubuntu-latest
+    steps:
+      - name: Start verification
+        run: echo Verification Start
+
+      # Run assignment action
+      - name: Assign reviewers and assignees
+        uses: dm-vdo/vdo-auto-assign@main
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Remove all known labels
+        uses: buildsville/add-remove-label@v2.0.0
+        with:
+          token: ${{secrets.GITHUB_TOKEN}}
+          labels: untested, private testing running, private testing done, private testing failed
+          type: remove
+
+      - name: Add untested label
+        uses: buildsville/add-remove-label@v2.0.0
+        with:
+          token: ${{secrets.VDODEVEL}}
+          label: untested
+          type: add
+
+  Testing:
+    if: github.event.pull_request.draft == false
+    runs-on: [self-hosted, trusted]
+    needs: [Verifying]
+    steps:
+    - uses: actions/checkout@v2
+      with:
+        ref: ${{ github.event.pull_request.head.sha }}
+        path: dm-linux
+
+    - uses: actions/checkout@v2
+      with:
+        repository: ${{ github.event.repository.owner.login }}/vdo-devel
+        path: vdo-devel
+
+    - name: Add private testing running label
+      uses: buildsville/add-remove-label@v2.0.0
+      with:
+        token: ${{secrets.GITHUB_TOKEN}}
+        label: private testing running
+        type: add
+
+    - name: Start private testing
+      run: |
+        cd ${{ github.workspace }}/vdo-devel/src/
+        make DMLINUX_SOURCE_DIR=${{ github.workspace }}/dm-linux dmlinux-jenkins
+
+    - name: Copy log file to /permatbit/user/bunsen/artifacts
+      if: failure()
+      run: |
+        REPO=${{ github.repository }}
+        REPO=${REPO//\//-}
+        cp -a ${{ github.workspace }}/vdo-devel/logs /permabit/user/bunsen/artifacts/logs.`date +'%Y%m%d%H%M'`.$REPO.${{ github.event.number }}
+
+    - name: Remove private testing running label
+      if: always()
+      uses: buildsville/add-remove-label@v2.0.0
+      with:
+        token: ${{secrets.GITHUB_TOKEN}}
+        label: private testing running
+        type: remove
+
+    - name: Add private testing failed
+      if: failure()
+      uses: buildsville/add-remove-label@v2.0.0
+      with:
+        token: ${{secrets.GITHUB_TOKEN}}
+        label: private testing failed
+        type: add
+
+    - name: Add private testing success
+      if: success()
+      uses: buildsville/add-remove-label@v2.0.0
+      with:
+        token: ${{secrets.VDODEVEL}}
+        label: private testing done
+        type: add
diff --git a/.github/workflows/notify.yml b/.github/workflows/notify.yml
new file mode 100644
index 0000000000000..ec624d2e81740
--- /dev/null
+++ b/.github/workflows/notify.yml
@@ -0,0 +1,98 @@
+name: Notify
+
+on:
+  schedule:
+    - cron: '0 0,2,10,12,14,16,18,20,22 * * 1-5'
+  workflow_dispatch:
+
+jobs:
+  notify_schedule:
+    env:
+      ORG: ${{ github.repository_owner }}
+      REPO: ${{ github.event.repository.name }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: Notify PRs Needing Review
+        uses: octokit/graphql-action@v2.x
+        id: prs_need_review
+        with:
+          query: |
+            query needreview($queryString:String!) {
+              search(type: ISSUE, query: $queryString, last: 100) {
+                issueCount
+              }
+            }
+          variables: |
+            queryString: "repo:${{ env.ORG }}/${{env.REPO}}, state:open, is:pr, -label:\"on hold\", review:required,"
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - uses: slackapi/slack-github-action@v1.24.0
+        if: ${{ env.COUNT > 0 }}
+        with:
+          # This data can be any valid JSON from a previous step in the GitHub Action
+          payload: |
+            {
+              "PR_MSG": "Number of PRs awaiting reviews in ${{env.REPO}} : ${{ env.COUNT }}",
+              "PR_LINK": "https://github.com/${{env.ORG}}/${{env.REPO}}/pulls?q=is%3Apr+is%3Aopen+-label%3A%22on%20hold%22+review%3Arequired"
+            }
+        env:
+          COUNT: ${{ fromJSON(steps.prs_need_review.outputs.data).search.issueCount }}
+          SLACK_WEBHOOK_URL: ${{ secrets.SLACKMSG }}
+
+      - name: Notify PRs With Feedback
+        uses: octokit/graphql-action@v2.x
+        id: prs_with_feedback
+        with:
+          query: |
+            query withfeedback($queryString:String!) {
+              search(type: ISSUE, query: $queryString, last: 100) {
+                issueCount
+              }
+            }
+          variables: |
+            queryString: "repo:${{env.ORG}}/${{env.REPO}}, state:open, is:pr, -label:\"on hold\", review:changes_requested,"
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - uses: slackapi/slack-github-action@v1.24.0
+        if: ${{ env.COUNT > 0 }}
+        with:
+          # This data can be any valid JSON from a previous step in the GitHub Action
+          payload: |
+            {
+              "PR_MSG": "Number of PRs awaiting replies to reviews ${{env.REPO}} : ${{ env.COUNT }}",
+              "PR_LINK": "https://github.com/${{env.ORG}}/${{env.REPO}}/pulls?q=is%3Apr+is%3Aopen+-label%3A%22on%20hold%22+review%3Achanges_requested"
+            }
+        env:
+          COUNT: ${{ fromJSON(steps.prs_with_feedback.outputs.data).search.issueCount }}
+          SLACK_WEBHOOK_URL: ${{ secrets.SLACKMSG }}
+
+
+      - name: Notify PRs With Approval
+        uses: octokit/graphql-action@v2.x
+        id: prs_with_approval
+        with:
+          query: |
+            query withapproval($queryString:String!) {
+              search(type: ISSUE, query: $queryString, last: 100) {
+                issueCount
+              }
+            }
+          variables: |
+            queryString: "repo:${{env.ORG}}/${{env.REPO}}, state:open, is:pr, -label:\"on hold\", review:approved,"
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+      - uses: slackapi/slack-github-action@v1.24.0
+        if: ${{ env.COUNT > 0 }}
+        with:
+          # This data can be any valid JSON from a previous step in the GitHub Action
+          payload: |
+            {
+              "PR_MSG": "Number of PRs approved in ${{env.REPO}} : ${{ env.COUNT }}",
+              "PR_LINK": "https://github.com/${{env.ORG}}/${{env.REPO}}/pulls?q=is%3Apr+is%3Aopen+-label%3A%22on%20hold%22+review%3Aapproved"
+            }
+        env:
+          COUNT: ${{ fromJSON(steps.prs_with_approval.outputs.data).search.issueCount }}
+          SLACK_WEBHOOK_URL: ${{ secrets.SLACKMSG }}