From cf391af511ce085e75d659e824c4b4874fe58fab Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 6 Sep 2024 19:14:36 -0400 Subject: [PATCH] bcachefs: bch2_ec_stripe_head_get() now checks for change in rw devices This factors out ec_strie_head_devs_update(), which initializes the bitmap of devices we're allocating from, and runs it every time c->rw_devs_change_count changes. We also cancel pending, not allocated stripes, since they may refer to devices that are no longer available. Signed-off-by: Kent Overstreet --- fs/bcachefs/ec.c | 81 +++++++++++++++++++++++++++++++----------------- fs/bcachefs/ec.h | 3 ++ 2 files changed, 56 insertions(+), 28 deletions(-) diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 78818d8c5279..fa549f4cabf2 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -1568,10 +1568,12 @@ void bch2_ec_do_stripe_creates(struct bch_fs *c) bch2_write_ref_put(c, BCH_WRITE_REF_stripe_create); } -static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h) +static void ec_stripe_new_set_pending(struct bch_fs *c, struct ec_stripe_head *h) { struct ec_stripe_new *s = h->s; + lockdep_assert_held(&h->lock); + BUG_ON(!s->allocated && !s->err); h->s = NULL; @@ -1584,6 +1586,12 @@ static void ec_stripe_set_pending(struct bch_fs *c, struct ec_stripe_head *h) ec_stripe_new_put(c, s, STRIPE_REF_io); } +static void ec_stripe_new_cancel(struct bch_fs *c, struct ec_stripe_head *h, int err) +{ + h->s->err = err; + ec_stripe_new_set_pending(c, h); +} + void bch2_ec_bucket_cancel(struct bch_fs *c, struct open_bucket *ob) { struct ec_stripe_new *s = ob->ec; @@ -1707,27 +1715,12 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h) return 0; } -static struct ec_stripe_head * -ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label, - unsigned algo, unsigned redundancy, - enum bch_watermark watermark) +static void ec_stripe_head_devs_update(struct bch_fs *c, struct ec_stripe_head *h) { - struct ec_stripe_head *h; - - h = kzalloc(sizeof(*h), GFP_KERNEL); - if (!h) - return NULL; - - mutex_init(&h->lock); - BUG_ON(!mutex_trylock(&h->lock)); - - h->disk_label = disk_label; - h->algo = algo; - h->redundancy = redundancy; - h->watermark = watermark; - rcu_read_lock(); - h->devs = target_rw_devs(c, BCH_DATA_user, disk_label ? group_to_target(disk_label - 1) : 0); + h->devs = target_rw_devs(c, BCH_DATA_user, h->disk_label + ? group_to_target(h->disk_label - 1) + : 0); unsigned nr_devs = dev_mask_nr(&h->devs); for_each_member_device_rcu(c, ca, &h->devs) @@ -1737,6 +1730,7 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label, h->blocksize = pick_blocksize(c, &h->devs); + h->nr_active_devs = 0; for_each_member_device_rcu(c, ca, &h->devs) if (ca->mi.bucket_size == h->blocksize) h->nr_active_devs++; @@ -1747,7 +1741,9 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label, * If we only have redundancy + 1 devices, we're better off with just * replication: */ - if (h->nr_active_devs < h->redundancy + 2) { + h->insufficient_devs = h->nr_active_devs < h->redundancy + 2; + + if (h->insufficient_devs) { const char *err; if (nr_devs < h->redundancy + 2) @@ -1762,6 +1758,31 @@ ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label, h->nr_active_devs, h->redundancy + 2, err); } + if (h->s && !h->s->allocated) + ec_stripe_new_cancel(c, h, -EINTR); + + h->rw_devs_change_count = c->rw_devs_change_count; +} + +static struct ec_stripe_head * +ec_new_stripe_head_alloc(struct bch_fs *c, unsigned disk_label, + unsigned algo, unsigned redundancy, + enum bch_watermark watermark) +{ + struct ec_stripe_head *h; + + h = kzalloc(sizeof(*h), GFP_KERNEL); + if (!h) + return NULL; + + mutex_init(&h->lock); + BUG_ON(!mutex_trylock(&h->lock)); + + h->disk_label = disk_label; + h->algo = algo; + h->redundancy = redundancy; + h->watermark = watermark; + list_add(&h->list, &c->ec_stripe_head_list); return h; } @@ -1772,7 +1793,7 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h) h->s->allocated && bitmap_weight(h->s->blocks_allocated, h->s->nr_data) == h->s->nr_data) - ec_stripe_set_pending(c, h); + ec_stripe_new_set_pending(c, h); mutex_unlock(&h->lock); } @@ -1797,7 +1818,7 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans, if (test_bit(BCH_FS_going_ro, &c->flags)) { h = ERR_PTR(-BCH_ERR_erofs_no_writes); - goto found; + goto err; } list_for_each_entry(h, &c->ec_stripe_head_list, list) @@ -1806,18 +1827,23 @@ __bch2_ec_stripe_head_get(struct btree_trans *trans, h->redundancy == redundancy && h->watermark == watermark) { ret = bch2_trans_mutex_lock(trans, &h->lock); - if (ret) + if (ret) { h = ERR_PTR(ret); + goto err; + } goto found; } h = ec_new_stripe_head_alloc(c, disk_label, algo, redundancy, watermark); found: - if (!IS_ERR_OR_NULL(h) && - h->nr_active_devs < h->redundancy + 2) { + if (h->rw_devs_change_count != c->rw_devs_change_count) + ec_stripe_head_devs_update(c, h); + + if (h->insufficient_devs) { mutex_unlock(&h->lock); h = NULL; } +err: mutex_unlock(&c->ec_stripe_head_lock); return h; } @@ -2267,8 +2293,7 @@ static void __bch2_ec_stop(struct bch_fs *c, struct bch_dev *ca) } goto unlock; found: - h->s->err = -BCH_ERR_erofs_no_writes; - ec_stripe_set_pending(c, h); + ec_stripe_new_cancel(c, h, -BCH_ERR_erofs_no_writes); unlock: mutex_unlock(&h->lock); } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 9520844c1b0c..05b812c1e49b 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -193,6 +193,9 @@ struct ec_stripe_head { unsigned algo; unsigned redundancy; enum bch_watermark watermark; + bool insufficient_devs; + + unsigned long rw_devs_change_count; u64 nr_created;