diff --git a/.bcachefs_revision b/.bcachefs_revision index 939980743..d9447c3f3 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -c3e4d892b77b9361c88854b0192223f68947b6b0 +eb83f1f842bb95e1f61489b48854240777368763 diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 01b29c850..ad4ad795b 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -1723,25 +1723,28 @@ void bch2_do_discards(struct bch_fs *c) static int invalidate_one_bucket(struct btree_trans *trans, struct btree_iter *lru_iter, struct bkey_s_c lru_k, - struct bpos *last_flushed_pos, s64 *nr_to_invalidate) { struct bch_fs *c = trans->c; + struct btree_iter alloc_iter = { NULL }; + struct bkey_i_alloc_v4 *a = NULL; + struct printbuf buf = PRINTBUF; + struct bpos bucket = u64_to_bucket(lru_k.k->p.offset); + unsigned cached_sectors; int ret = 0; if (*nr_to_invalidate <= 0) return 1; - ret = bch2_check_lru_key(trans, lru_iter, lru_k, last_flushed_pos); - if (ret) - return ret < 0 ? ret : 0; + if (!bch2_dev_bucket_exists(c, bucket)) { + prt_str(&buf, "lru entry points to invalid bucket"); + goto err; + } - struct bpos bucket = u64_to_bucket(lru_k.k->p.offset); if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset)) return 0; - struct btree_iter alloc_iter; - struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket); + a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket); ret = PTR_ERR_OR_ZERO(a); if (ret) goto out; @@ -1755,7 +1758,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, if (!a->v.cached_sectors) bch_err(c, "invalidating empty bucket, confused"); - unsigned cached_sectors = a->v.cached_sectors; + cached_sectors = a->v.cached_sectors; SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); a->v.gen++; @@ -1777,7 +1780,28 @@ static int invalidate_one_bucket(struct btree_trans *trans, --*nr_to_invalidate; out: bch2_trans_iter_exit(trans, &alloc_iter); + printbuf_exit(&buf); return ret; +err: + prt_str(&buf, "\n lru key: "); + bch2_bkey_val_to_text(&buf, c, lru_k); + + prt_str(&buf, "\n lru entry: "); + bch2_lru_pos_to_text(&buf, lru_iter->pos); + + prt_str(&buf, "\n alloc key: "); + if (!a) + bch2_bpos_to_text(&buf, bucket); + else + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); + + bch_err(c, "%s", buf.buf); + if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_lrus) { + bch2_inconsistent_error(c); + ret = -EINVAL; + } + + goto out; } static void bch2_do_invalidates_work(struct work_struct *work) @@ -1787,7 +1811,6 @@ static void bch2_do_invalidates_work(struct work_struct *work) struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; - struct bpos last_flushed_pos = POS_MIN; unsigned i; int ret = 0; @@ -1803,8 +1826,7 @@ static void bch2_do_invalidates_work(struct work_struct *work) lru_pos(ca->dev_idx, 0, 0), lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX), BTREE_ITER_INTENT, k, - invalidate_one_bucket(trans, &iter, k, &last_flushed_pos, - &nr_to_invalidate)); + invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate)); if (ret < 0) { percpu_ref_put(&ca->ref); diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 12820acbc..2f194f86c 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -441,11 +441,13 @@ static int check_bp_exists(struct btree_trans *trans, memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { if (last_flushed->level != bp.level || !bpos_eq(last_flushed->pos, orig_k.k->p)) { + ret = bch2_btree_write_buffer_flush_sync(trans); + if (ret) + goto err; + last_flushed->level = bp.level; last_flushed->pos = orig_k.k->p; - - ret = bch2_btree_write_buffer_flush_sync(trans) ?: - -BCH_ERR_transaction_restart_write_buffer_flush; + ret = -BCH_ERR_transaction_restart_write_buffer_flush; goto out; } goto missing; @@ -614,10 +616,10 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, struct btree_iter iter; enum btree_id btree_id; struct bkey_s_c k; - struct bpos_level last_flushed = { UINT_MAX, POS_MIN }; int ret = 0; for (btree_id = 0; btree_id < btree_id_nr_alive(c); btree_id++) { + struct bpos_level last_flushed = { UINT_MAX, POS_MIN }; int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; ret = commit_do(trans, NULL, NULL, @@ -632,17 +634,24 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level, BTREE_ITER_PREFETCH); - for_each_btree_key_continue(trans, iter, BTREE_ITER_PREFETCH, k, ret) { - ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc, + while (1) { + bch2_trans_begin(trans); + k = bch2_btree_iter_peek(&iter); + if (!k.k) + break; + ret = bkey_err(k) ?: check_extent_to_backpointers(trans, btree_id, level, bucket_start, bucket_end, - &last_flushed, k)); + &last_flushed, k) ?: + bch2_trans_commit(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc); + if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { + ret = 0; + continue; + } if (ret) break; - - if (bpos_eq(iter.pos, SPOS_MAX)) - break; + bch2_btree_iter_advance(&iter); } bch2_trans_iter_exit(trans, &iter); diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c index 82c08a987..e6d081c05 100644 --- a/libbcachefs/lru.c +++ b/libbcachefs/lru.c @@ -40,8 +40,8 @@ void bch2_lru_pos_to_text(struct printbuf *out, struct bpos lru) u64_to_bucket(lru.offset).offset); } -static inline int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, - u64 dev_bucket, u64 time, bool set) +static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, + u64 dev_bucket, u64 time, bool set) { return time ? bch2_btree_bit_mod(trans, BTREE_ID_lru, @@ -51,12 +51,12 @@ static inline int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) { - return __bch2_lru_set(trans, lru_id, dev_bucket, time, false); + return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted); } int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) { - return __bch2_lru_set(trans, lru_id, dev_bucket, time, true); + return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set); } int bch2_lru_change(struct btree_trans *trans, @@ -66,8 +66,8 @@ int bch2_lru_change(struct btree_trans *trans, if (old_time == new_time) return 0; - return __bch2_lru_set(trans, lru_id, dev_bucket, old_time, false) ?: - __bch2_lru_set(trans, lru_id, dev_bucket, new_time, true); + return bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?: + bch2_lru_set(trans, lru_id, dev_bucket, new_time); } static const char * const bch2_lru_types[] = { @@ -77,11 +77,10 @@ static const char * const bch2_lru_types[] = { NULL }; -/* Returns 1 if key has been deleted */ -int bch2_check_lru_key(struct btree_trans *trans, - struct btree_iter *lru_iter, - struct bkey_s_c lru_k, - struct bpos *last_flushed_pos) +static int bch2_check_lru_key(struct btree_trans *trans, + struct btree_iter *lru_iter, + struct bkey_s_c lru_k, + struct bpos *last_flushed_pos) { struct bch_fs *c = trans->c; struct btree_iter iter; @@ -90,6 +89,7 @@ int bch2_check_lru_key(struct btree_trans *trans, const struct bch_alloc_v4 *a; struct printbuf buf1 = PRINTBUF; struct printbuf buf2 = PRINTBUF; + enum bch_lru_type type = lru_type(lru_k); struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset); u64 idx; int ret; @@ -98,7 +98,7 @@ int bch2_check_lru_key(struct btree_trans *trans, lru_entry_to_invalid_bucket, "lru key points to nonexistent device:bucket %llu:%llu", alloc_pos.inode, alloc_pos.offset)) - goto delete; + return bch2_btree_delete_at(trans, lru_iter, 0); k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, alloc_pos, 0); ret = bkey_err(k); @@ -107,7 +107,6 @@ int bch2_check_lru_key(struct btree_trans *trans, a = bch2_alloc_to_v4(k, &a_convert); - enum bch_lru_type type = lru_type(lru_k); switch (type) { case BCH_LRU_read: idx = alloc_lru_idx_read(*a); @@ -115,34 +114,27 @@ int bch2_check_lru_key(struct btree_trans *trans, case BCH_LRU_fragmentation: idx = a->fragmentation_lru; break; - default: - /* unknown LRU type, don't check: */ - goto out; } if (lru_k.k->type != KEY_TYPE_set || lru_pos_time(lru_k.k->p) != idx) { if (!bpos_eq(*last_flushed_pos, lru_k.k->p)) { - ret = bch2_btree_write_buffer_flush_sync(trans); - if (!ret) { - *last_flushed_pos = lru_k.k->p; - ret = -BCH_ERR_transaction_restart_write_buffer_flush; - } + *last_flushed_pos = lru_k.k->p; + ret = bch2_btree_write_buffer_flush_sync(trans) ?: + -BCH_ERR_transaction_restart_write_buffer_flush; goto out; } - if ((c->opts.reconstruct_alloc && - c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_lrus) || + if (c->opts.reconstruct_alloc || fsck_err(c, lru_entry_bad, "incorrect lru entry: lru %s time %llu\n" " %s\n" - "for\n" - " %s", + " for %s", bch2_lru_types[type], lru_pos_time(lru_k.k->p), (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) - goto delete; + ret = bch2_btree_delete_at(trans, lru_iter, 0); } out: err: @@ -151,14 +143,6 @@ int bch2_check_lru_key(struct btree_trans *trans, printbuf_exit(&buf2); printbuf_exit(&buf1); return ret; -delete: - ret = bch2_btree_delete_at(trans, lru_iter, 0) ?: - bch2_trans_commit(trans, NULL, NULL, - BCH_WATERMARK_btree| - BCH_TRANS_COMMIT_lazy_rw| - BCH_TRANS_COMMIT_no_enospc) ?: - 1; - goto out; } int bch2_check_lrus(struct bch_fs *c) @@ -166,14 +150,15 @@ int bch2_check_lrus(struct bch_fs *c) struct btree_iter iter; struct bkey_s_c k; struct bpos last_flushed_pos = POS_MIN; + int ret = 0; - int ret = bch2_trans_run(c, - for_each_btree_key2(trans, iter, - BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, ({ - int ret2 = bch2_check_lru_key(trans, &iter, k, &last_flushed_pos); - - ret2 < 0 ? ret2 : 0; - }))); - bch_err_fn(c, ret); + ret = bch2_trans_run(c, + for_each_btree_key_commit(trans, iter, + BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, + bch2_check_lru_key(trans, &iter, k, &last_flushed_pos))); + if (ret) + bch_err_fn(c, ret); return ret; + } diff --git a/libbcachefs/lru.h b/libbcachefs/lru.h index 014dba7cd..429dca816 100644 --- a/libbcachefs/lru.h +++ b/libbcachefs/lru.h @@ -64,8 +64,6 @@ int bch2_lru_del(struct btree_trans *, u16, u64, u64); int bch2_lru_set(struct btree_trans *, u16, u64, u64); int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64); -int bch2_check_lru_key(struct btree_trans *, struct btree_iter *, - struct bkey_s_c, struct bpos *); int bch2_check_lrus(struct bch_fs *); #endif /* _BCACHEFS_LRU_H */ diff --git a/libbcachefs/move.c b/libbcachefs/move.c index 3efe6a6e1..5ed9f53b8 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -159,7 +159,7 @@ void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt) atomic_read(&ctxt->write_sectors) != sectors_pending); } -static void bch2_moving_ctxt_flush_all(struct moving_context *ctxt) +void bch2_moving_ctxt_flush_all(struct moving_context *ctxt) { move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); bch2_trans_unlock_long(ctxt->trans); @@ -635,7 +635,7 @@ int bch2_move_data(struct bch_fs *c, return ret; } -int __bch2_evacuate_bucket(struct moving_context *ctxt, +int bch2_evacuate_bucket(struct moving_context *ctxt, struct move_bucket_in_flight *bucket_in_flight, struct bpos bucket, int gen, struct data_update_opts _data_opts) @@ -794,24 +794,6 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, return ret; } -int bch2_evacuate_bucket(struct bch_fs *c, - struct bpos bucket, int gen, - struct data_update_opts data_opts, - struct bch_ratelimit *rate, - struct bch_move_stats *stats, - struct write_point_specifier wp, - bool wait_on_copygc) -{ - struct moving_context ctxt; - int ret; - - bch2_moving_ctxt_init(&ctxt, c, rate, stats, wp, wait_on_copygc); - ret = __bch2_evacuate_bucket(&ctxt, NULL, bucket, gen, data_opts); - bch2_moving_ctxt_exit(&ctxt); - - return ret; -} - typedef bool (*move_btree_pred)(struct bch_fs *, void *, struct btree *, struct bch_io_opts *, struct data_update_opts *); diff --git a/libbcachefs/move.h b/libbcachefs/move.h index 531965674..9baf3093a 100644 --- a/libbcachefs/move.h +++ b/libbcachefs/move.h @@ -83,6 +83,7 @@ void bch2_moving_ctxt_init(struct moving_context *, struct bch_fs *, struct write_point_specifier, bool); struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *); void bch2_moving_ctxt_do_pending_writes(struct moving_context *); +void bch2_moving_ctxt_flush_all(struct moving_context *); void bch2_move_ctxt_wait_for_io(struct moving_context *); int bch2_move_ratelimit(struct moving_context *); @@ -135,16 +136,10 @@ int bch2_move_data(struct bch_fs *, bool, move_pred_fn, void *); -int __bch2_evacuate_bucket(struct moving_context *, +int bch2_evacuate_bucket(struct moving_context *, struct move_bucket_in_flight *, struct bpos, int, struct data_update_opts); -int bch2_evacuate_bucket(struct bch_fs *, struct bpos, int, - struct data_update_opts, - struct bch_ratelimit *, - struct bch_move_stats *, - struct write_point_specifier, - bool); int bch2_data_job(struct bch_fs *, struct bch_move_stats *, struct bch_ioctl_data); diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index 794e4c56e..7155e2060 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -149,7 +149,6 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, struct bkey_s_c k; size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4); size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0; - struct bpos last_flushed_pos = POS_MIN; int ret; move_buckets_wait(ctxt, buckets_in_flight, false); @@ -166,16 +165,11 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0), lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX), 0, k, ({ - int ret2 = bch2_check_lru_key(trans, &iter, k, &last_flushed_pos); - if (ret2) { - ret2 = ret2 < 0 ? ret2 : 0; - goto next; - } + struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) }; + int ret2 = 0; saw++; - struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) }; - if (!bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p))) not_movable++; else if (bucket_in_flight(buckets_in_flight, b.k)) @@ -185,7 +179,6 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, if (ret2 >= 0) sectors += b.sectors; } -next: ret2; })); @@ -231,7 +224,7 @@ static int bch2_copygc(struct moving_context *ctxt, break; } - ret = __bch2_evacuate_bucket(ctxt, f, f->bucket.k.bucket, + ret = bch2_evacuate_bucket(ctxt, f, f->bucket.k.bucket, f->bucket.k.gen, data_opts); if (ret) goto err; @@ -344,7 +337,8 @@ static int bch2_copygc_thread(void *arg) if (!c->copy_gc_enabled) { move_buckets_wait(&ctxt, buckets, true); - kthread_wait_freezable(c->copy_gc_enabled); + kthread_wait_freezable(c->copy_gc_enabled || + kthread_should_stop()); } if (unlikely(freezing(current))) { diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index fefa4395f..79bd4ad01 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -331,8 +331,16 @@ static int do_rebalance(struct moving_context *ctxt) BTREE_ID_rebalance_work, POS_MIN, BTREE_ITER_ALL_SNAPSHOTS); - while (!bch2_move_ratelimit(ctxt) && - !kthread_wait_freezable(r->enabled)) { + while (!bch2_move_ratelimit(ctxt)) { + if (!r->enabled) { + bch2_moving_ctxt_flush_all(ctxt); + kthread_wait_freezable(c->copy_gc_enabled || + kthread_should_stop()); + } + + if (kthread_should_stop()) + break; + bch2_trans_begin(trans); ret = bkey_err(k = next_rebalance_entry(trans, &rebalance_work_iter));