From cf391af511ce085e75d659e824c4b4874fe58fab Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Fri, 6 Sep 2024 19:14:36 -0400
Subject: [PATCH] bcachefs: bch2_ec_stripe_head_get() now checks for change in
 rw devices

This factors out ec_strie_head_devs_update(), which initializes the
bitmap of devices we're allocating from, and runs it every time
c->rw_devs_change_count changes.

We also cancel pending, not allocated stripes, since they may refer to
devices that are no longer available.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/ec.c | 81 +++++++++++++++++++++++++++++++-----------------
 fs/bcachefs/ec.h |  3 ++
 2 files changed, 56 insertions(+), 28 deletions(-)

diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index 78818d8c5279..fa549f4cabf2 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -1568,10 +1568,12 @@ void bch2_ec_do_stripe_creates(struct bch_fs *c)
 		bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create);
 }
 
-static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
+static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
 {
 	struct ec_stripe_new *s = h->s;
 
+	lockdep_assert_held(&h->lock);
+
 	BUG_ON(!s->allocated && !s->err);
 
 	h->s		= NULL;
@@ -1584,6 +1586,12 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h)
 	ec_stripe_new_put(c, s, STRIPE_REF_io);
 }
 
+static void ec_stripe_new_cancel(struct bch_fs *c, struct ec_stripe_head *h, int err)
+{
+	h->s->err = err;
+	ec_stripe_new_set_pending(c, h);
+}
+
 void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob)
 {
 	struct ec_stripe_new *s = ob->ec;
@@ -1707,27 +1715,12 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
 	return 0;
 }
 
-static struct ec_stripe_head *
-ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
-			 unsigned algo, unsigned redundancy,
-			 enum bch_watermark watermark)
+static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h)
 {
-	struct ec_stripe_head *h;
-
-	h = kzalloc(sizeof(*h), GFP_KERNEL);
-	if (!h)
-		return NULL;
-
-	mutex_init(&h->lock);
-	BUG_ON(!mutex_trylock(&h->lock));
-
-	h->disk_label	= disk_label;
-	h->algo		= algo;
-	h->redundancy	= redundancy;
-	h->watermark	= watermark;
-
 	rcu_read_lock();
-	h->devs = target_rw_devs(c, BCH_DATA_user, disk_label ? group_to_target(disk_label - 1) : 0);
+	h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label
+				 ? group_to_target(h->disk_label - 1)
+				 : 0);
 	unsigned nr_devs = dev_mask_nr(&h->devs);
 
 	for_each_member_device_rcu(c, ca, &h->devs)
@@ -1737,6 +1730,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
 
 	h->blocksize = pick_blocksize(c, &h->devs);
 
+	h->nr_active_devs = 0;
 	for_each_member_device_rcu(c, ca, &h->devs)
 		if (ca->mi.bucket_size == h->blocksize)
 			h->nr_active_devs++;
@@ -1747,7 +1741,9 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
 	 * If we only have redundancy + 1 devices, we're better off with just
 	 * replication:
 	 */
-	if (h->nr_active_devs < h->redundancy + 2) {
+	h->insufficient_devs = h->nr_active_devs < h->redundancy + 2;
+
+	if (h->insufficient_devs) {
 		const char *err;
 
 		if (nr_devs < h->redundancy + 2)
@@ -1762,6 +1758,31 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
 				h->nr_active_devs, h->redundancy + 2, err);
 	}
 
+	if (h->s && !h->s->allocated)
+		ec_stripe_new_cancel(c, h, -EINTR);
+
+	h->rw_devs_change_count = c->rw_devs_change_count;
+}
+
+static struct ec_stripe_head *
+ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label,
+			 unsigned algo, unsigned redundancy,
+			 enum bch_watermark watermark)
+{
+	struct ec_stripe_head *h;
+
+	h = kzalloc(sizeof(*h), GFP_KERNEL);
+	if (!h)
+		return NULL;
+
+	mutex_init(&h->lock);
+	BUG_ON(!mutex_trylock(&h->lock));
+
+	h->disk_label	= disk_label;
+	h->algo		= algo;
+	h->redundancy	= redundancy;
+	h->watermark	= watermark;
+
 	list_add(&h->list, &c->ec_stripe_head_list);
 	return h;
 }
@@ -1772,7 +1793,7 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
 	    h->s->allocated &&
 	    bitmap_weight(h->s->blocks_allocated,
 			  h->s->nr_data) == h->s->nr_data)
-		ec_stripe_set_pending(c, h);
+		ec_stripe_new_set_pending(c, h);
 
 	mutex_unlock(&h->lock);
 }
@@ -1797,7 +1818,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
 
 	if (test_bit(BCH_FS_going_ro, &c->flags)) {
 		h = ERR_PTR(-BCH_ERR_erofs_no_writes);
-		goto found;
+		goto err;
 	}
 
 	list_for_each_entry(h, &c->ec_stripe_head_list, list)
@@ -1806,18 +1827,23 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans,
 		    h->redundancy	== redundancy &&
 		    h->watermark	== watermark) {
 			ret = bch2_trans_mutex_lock(trans, &h->lock);
-			if (ret)
+			if (ret) {
 				h = ERR_PTR(ret);
+				goto err;
+			}
 			goto found;
 		}
 
 	h = ec_new_stripe_head_alloc(c, disk_label, algo, redundancy, watermark);
 found:
-	if (!IS_ERR_OR_NULL(h) &&
-	    h->nr_active_devs < h->redundancy + 2) {
+	if (h->rw_devs_change_count != c->rw_devs_change_count)
+		ec_stripe_head_devs_update(c, h);
+
+	if (h->insufficient_devs) {
 		mutex_unlock(&h->lock);
 		h = NULL;
 	}
+err:
 	mutex_unlock(&c->ec_stripe_head_lock);
 	return h;
 }
@@ -2267,8 +2293,7 @@ static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca)
 		}
 		goto unlock;
 found:
-		h->s->err = -BCH_ERR_erofs_no_writes;
-		ec_stripe_set_pending(c, h);
+		ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes);
 unlock:
 		mutex_unlock(&h->lock);
 	}
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index 9520844c1b0c..05b812c1e49b 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -193,6 +193,9 @@ struct ec_stripe_head {
 	unsigned		algo;
 	unsigned		redundancy;
 	enum bch_watermark	watermark;
+	bool			insufficient_devs;
+
+	unsigned long		rw_devs_change_count;
 
 	u64			nr_created;