From c5095f41ea1d3e74349ecdcbddcd3813b9d07007 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 8 Jun 2024 17:49:11 -0400 Subject: [PATCH] bcachefs: Simplify btree key cache fill path Don't allocate the new bkey_cached until after we've done the btree lookup; this means we can kill bkey_cached.valid. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 9 +- fs/bcachefs/btree_key_cache.c | 301 ++++++++++++---------------------- fs/bcachefs/btree_types.h | 1 - 3 files changed, 112 insertions(+), 199 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index c68cc7149dbb5..a4769c190b8fe 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1801,13 +1801,12 @@ struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct bkey * goto hole; } else { struct bkey_cached *ck = (void *) path->l[0].b; - - EBUG_ON(ck && - (path->btree_id != ck->key.btree_id || - !bkey_eq(path->pos, ck->key.pos))); - if (!ck || !ck->valid) + if (!ck) return bkey_s_c_null; + EBUG_ON(path->btree_id != ck->key.btree_id || + !bkey_eq(path->pos, ck->key.pos)); + *u = ck->k->k; k = bkey_i_to_s_c(ck->k); } diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index 88ccf8dbb5a96..7bfcb3abb482f 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -326,71 +326,112 @@ bkey_cached_reuse(struct btree_key_cache *c) return ck; } -static struct bkey_cached * -btree_key_cache_create(struct btree_trans *trans, struct btree_path *path) +static int btree_key_cache_create(struct btree_trans *trans, struct btree_path *path, + struct bkey_s_c k) { struct bch_fs *c = trans->c; struct btree_key_cache *bc = &c->btree_key_cache; - struct bkey_cached *ck; - bool was_new = false; - ck = bkey_cached_alloc(trans, path, &was_new); - if (IS_ERR(ck)) - return ck; + bool was_new = false; + struct bkey_cached *ck = bkey_cached_alloc(trans, path, &was_new); + int ret = PTR_ERR_OR_ZERO(ck); + if (ret) + return ret; if (unlikely(!ck)) { ck = bkey_cached_reuse(bc); if (unlikely(!ck)) { bch_err(c, "error allocating memory for key cache item, btree %s", bch2_btree_id_str(path->btree_id)); - return ERR_PTR(-BCH_ERR_ENOMEM_btree_key_cache_create); + return -BCH_ERR_ENOMEM_btree_key_cache_create; } - - mark_btree_node_locked(trans, path, 0, BTREE_NODE_INTENT_LOCKED); } ck->c.level = 0; ck->c.btree_id = path->btree_id; ck->key.btree_id = path->btree_id; ck->key.pos = path->pos; - ck->valid = false; ck->flags = 1U << BKEY_CACHED_ACCESSED; - if (unlikely(rhashtable_lookup_insert_fast(&bc->table, - &ck->hash, - bch2_btree_key_cache_params))) { - /* We raced with another fill: */ + /* + * bch2_varint_decode can read past the end of the buffer by at + * most 7 bytes (it won't be used): + */ + unsigned new_u64s = k.k->u64s + 1; + + /* + * Allocate some extra space so that the transaction commit path is less + * likely to have to reallocate, since that requires a transaction + * restart: + */ + new_u64s = min(256U, (new_u64s * 3) / 2); + new_u64s = roundup_pow_of_two(new_u64s); - if (likely(was_new)) { - six_unlock_write(&ck->c.lock); - six_unlock_intent(&ck->c.lock); - kfree(ck); + if (new_u64s > ck->u64s) { + struct bkey_i *new_k = allocate_dropping_locks(trans, ret, + kmalloc(new_u64s * sizeof(u64), _gfp)); + if (unlikely(!new_k)) { + bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", + bch2_btree_id_str(ck->key.btree_id), new_u64s); + ret = -BCH_ERR_ENOMEM_btree_key_cache_fill; } else { - bkey_cached_free_fast(bc, ck); + /* + * it's safe to use a successful allocation even if we + * got a transaction restart since this item's lock + * isn't yet tracked by the btree_trans + */ + kfree(ck->k); + ck->k = new_k; + ck->u64s = new_u64s; } - mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED); - return NULL; + if (unlikely(ret)) + goto err; } - atomic_long_inc(&bc->nr_keys); + bkey_reassemble(ck->k, k); + ret = rhashtable_lookup_insert_fast(&bc->table, &ck->hash, bch2_btree_key_cache_params); + if (unlikely(ret)) /* raced with another fill? */ + goto err; + + atomic_long_inc(&bc->nr_keys); six_unlock_write(&ck->c.lock); - return ck; + enum six_lock_type lock_want = __btree_lock_want(path, 0); + if (lock_want == SIX_LOCK_read) + six_lock_downgrade(&ck->c.lock); + btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want); + path->uptodate = BTREE_ITER_UPTODATE; + return 0; +err: + if (likely(was_new)) { + six_unlock_write(&ck->c.lock); + six_unlock_intent(&ck->c.lock); + kfree(ck); + } else { + bkey_cached_free_fast(bc, ck); + mark_btree_node_locked_noreset(path, 0, BTREE_NODE_UNLOCKED); + } + + return ret; } -static int btree_key_cache_fill(struct btree_trans *trans, - struct btree_path *ck_path, - struct bkey_cached *ck) +static noinline int btree_key_cache_fill(struct btree_trans *trans, + struct btree_path *ck_path, + unsigned flags) { + if (flags & BTREE_ITER_cached_nofill) { + ck_path->uptodate = BTREE_ITER_UPTODATE; + return 0; + } + + struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_s_c k; - unsigned new_u64s = 0; - struct bkey_i *new_k = NULL; int ret; - bch2_trans_iter_init(trans, &iter, ck->key.btree_id, ck->key.pos, + bch2_trans_iter_init(trans, &iter, ck_path->btree_id, ck_path->pos, BTREE_ITER_key_cache_fill| BTREE_ITER_cached_nofill); iter.flags &= ~BTREE_ITER_with_journal; @@ -399,70 +440,15 @@ static int btree_key_cache_fill(struct btree_trans *trans, if (ret) goto err; - if (!bch2_btree_node_relock(trans, ck_path, 0)) { - trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path); - ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill); - goto err; - } - - /* - * bch2_varint_decode can read past the end of the buffer by at - * most 7 bytes (it won't be used): - */ - new_u64s = k.k->u64s + 1; - - /* - * Allocate some extra space so that the transaction commit path is less - * likely to have to reallocate, since that requires a transaction - * restart: - */ - new_u64s = min(256U, (new_u64s * 3) / 2); - - if (new_u64s > ck->u64s) { - new_u64s = roundup_pow_of_two(new_u64s); - new_k = kmalloc(new_u64s * sizeof(u64), GFP_NOWAIT|__GFP_NOWARN); - if (!new_k) { - bch2_trans_unlock(trans); - - new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL); - if (!new_k) { - bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", - bch2_btree_id_str(ck->key.btree_id), new_u64s); - ret = -BCH_ERR_ENOMEM_btree_key_cache_fill; - goto err; - } - - ret = bch2_trans_relock(trans); - if (ret) { - kfree(new_k); - goto err; - } - - if (!bch2_btree_node_relock(trans, ck_path, 0)) { - kfree(new_k); - trace_and_count(trans->c, trans_restart_relock_key_cache_fill, trans, _THIS_IP_, ck_path); - ret = btree_trans_restart(trans, BCH_ERR_transaction_restart_key_cache_fill); - goto err; - } - } - } + /* Recheck after btree lookup, before allocating: */ + ret = bch2_btree_key_cache_find(c, ck_path->btree_id, ck_path->pos) ? -EEXIST : 0; + if (unlikely(ret)) + goto out; - ret = bch2_btree_node_lock_write(trans, ck_path, &ck_path->l[0].b->c); - if (ret) { - kfree(new_k); + ret = btree_key_cache_create(trans, ck_path, k); + if (ret) goto err; - } - - if (new_k) { - kfree(ck->k); - ck->u64s = new_u64s; - ck->k = new_k; - } - - bkey_reassemble(ck->k, k); - ck->valid = true; - bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b); - +out: /* We're not likely to need this iterator again: */ bch2_set_btree_iter_dontneed(&iter); err: @@ -470,128 +456,62 @@ static int btree_key_cache_fill(struct btree_trans *trans, return ret; } -static noinline int -bch2_btree_path_traverse_cached_slowpath(struct btree_trans *trans, struct btree_path *path, - unsigned flags) +static inline int btree_path_traverse_cached_fast(struct btree_trans *trans, + struct btree_path *path) { struct bch_fs *c = trans->c; struct bkey_cached *ck; - int ret = 0; - - BUG_ON(path->level); - - path->l[1].b = NULL; - - if (bch2_btree_node_relock_notrace(trans, path, 0)) { - ck = (void *) path->l[0].b; - goto fill; - } retry: ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); - if (!ck) { - ck = btree_key_cache_create(trans, path); - ret = PTR_ERR_OR_ZERO(ck); - if (ret) - goto err; - if (!ck) - goto retry; - - btree_path_cached_set(trans, path, ck, BTREE_NODE_INTENT_LOCKED); - path->locks_want = 1; - } else { - enum six_lock_type lock_want = __btree_lock_want(path, 0); - - ret = btree_node_lock(trans, path, (void *) ck, 0, - lock_want, _THIS_IP_); - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto err; - - BUG_ON(ret); - - if (ck->key.btree_id != path->btree_id || - !bpos_eq(ck->key.pos, path->pos)) { - six_unlock_type(&ck->c.lock, lock_want); - goto retry; - } + if (!ck) + return -ENOENT; - btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want); - } -fill: - path->uptodate = BTREE_ITER_UPTODATE; + enum six_lock_type lock_want = __btree_lock_want(path, 0); - if (!ck->valid && !(flags & BTREE_ITER_cached_nofill)) { - ret = bch2_btree_path_upgrade(trans, path, 1) ?: - btree_key_cache_fill(trans, path, ck) ?: - bch2_btree_path_relock(trans, path, _THIS_IP_); - if (ret) - goto err; + int ret = btree_node_lock(trans, path, (void *) ck, 0, lock_want, _THIS_IP_); + if (ret) + return ret; - path->uptodate = BTREE_ITER_UPTODATE; + if (ck->key.btree_id != path->btree_id || + !bpos_eq(ck->key.pos, path->pos)) { + six_unlock_type(&ck->c.lock, lock_want); + goto retry; } if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) set_bit(BKEY_CACHED_ACCESSED, &ck->flags); - BUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0)); - BUG_ON(path->uptodate); - - return ret; -err: - path->uptodate = BTREE_ITER_NEED_TRAVERSE; - if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) { - btree_node_unlock(trans, path, 0); - path->l[0].b = ERR_PTR(ret); - } - return ret; + btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want); + path->uptodate = BTREE_ITER_UPTODATE; + return 0; } int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path *path, unsigned flags) { - struct bch_fs *c = trans->c; - struct bkey_cached *ck; - int ret = 0; - EBUG_ON(path->level); path->l[1].b = NULL; if (bch2_btree_node_relock_notrace(trans, path, 0)) { - ck = (void *) path->l[0].b; - goto fill; + path->uptodate = BTREE_ITER_UPTODATE; + return 0; } -retry: - ck = bch2_btree_key_cache_find(c, path->btree_id, path->pos); - if (!ck) - return bch2_btree_path_traverse_cached_slowpath(trans, path, flags); - - enum six_lock_type lock_want = __btree_lock_want(path, 0); - - ret = btree_node_lock(trans, path, (void *) ck, 0, - lock_want, _THIS_IP_); - EBUG_ON(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)); - - if (ret) - return ret; - if (ck->key.btree_id != path->btree_id || - !bpos_eq(ck->key.pos, path->pos)) { - six_unlock_type(&ck->c.lock, lock_want); - goto retry; + int ret; + do { + ret = btree_path_traverse_cached_fast(trans, path); + if (unlikely(ret == -ENOENT)) + ret = btree_key_cache_fill(trans, path, flags); + } while (ret == -EEXIST); + + if (unlikely(ret)) { + path->uptodate = BTREE_ITER_NEED_TRAVERSE; + if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + btree_node_unlock(trans, path, 0); + path->l[0].b = ERR_PTR(ret); + } } - - btree_path_cached_set(trans, path, ck, (enum btree_node_locked_type) lock_want); -fill: - if (!ck->valid) - return bch2_btree_path_traverse_cached_slowpath(trans, path, flags); - - if (!test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) - set_bit(BKEY_CACHED_ACCESSED, &ck->flags); - - path->uptodate = BTREE_ITER_UPTODATE; - EBUG_ON(!ck->valid); - EBUG_ON(btree_node_locked_type(path, 0) != btree_lock_want(path, 0)); - return ret; } @@ -630,8 +550,6 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans, goto out; } - BUG_ON(!ck->valid); - if (journal_seq && ck->journal.seq != journal_seq) goto out; @@ -753,7 +671,6 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans, BUG_ON(insert->k.u64s > ck->u64s); bkey_copy(ck->k, insert); - ck->valid = true; if (!test_bit(BKEY_CACHED_DIRTY, &ck->flags)) { EBUG_ON(test_bit(BCH_FS_clean_shutdown, &c->flags)); @@ -795,8 +712,6 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans, struct btree_key_cache *bc = &c->btree_key_cache; struct bkey_cached *ck = (void *) path->l[0].b; - BUG_ON(!ck->valid); - /* * We just did an update to the btree, bypassing the key cache: the key * cache key is now stale and must be dropped, even if dirty: diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index e7a78ef6273d3..c9c9864a87b0c 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -388,7 +388,6 @@ struct bkey_cached { unsigned long flags; unsigned long btree_trans_barrier_seq; u16 u64s; - bool valid; struct bkey_cached_key key; struct rhash_head hash;