diff --git a/.bcachefs_revision b/.bcachefs_revision index e41bb4016..939980743 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -feaca6edbd240bbd98d261097a97037c56a09eec +c3e4d892b77b9361c88854b0192223f68947b6b0 diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 56a18ace8..01b29c850 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -558,8 +558,7 @@ int bch2_bucket_gens_init(struct bch_fs *c) if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) { ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, + BCH_TRANS_COMMIT_no_enospc, bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); if (ret) break; @@ -578,8 +577,7 @@ int bch2_bucket_gens_init(struct bch_fs *c) if (have_bucket_gens_key && !ret) ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, + BCH_TRANS_COMMIT_no_enospc, bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0)); bch2_trans_put(trans); @@ -1276,7 +1274,7 @@ static noinline_for_stack int bch2_check_discard_freespace_key(struct btree_tran ret = bch2_btree_delete_extent_at(trans, iter, iter->btree_id == BTREE_ID_freespace ? 1 : 0, 0) ?: bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw); + BCH_TRANS_COMMIT_no_enospc); goto out; } @@ -1413,8 +1411,7 @@ int bch2_check_alloc_info(struct bch_fs *c) } ret = bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw); + BCH_TRANS_COMMIT_no_enospc); if (ret) goto bkey_err; @@ -1472,7 +1469,7 @@ int bch2_check_alloc_info(struct bch_fs *c) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_bucket_gens, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_check_bucket_gens_key(trans, &iter, k)); err: bch2_trans_put(trans); @@ -1562,7 +1559,7 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_check_alloc_to_lru_ref(trans, &iter))); bch_err_fn(c, ret); return ret; @@ -1674,7 +1671,7 @@ static int bch2_discard_one_bucket(struct btree_trans *trans, if (ret) goto out; - this_cpu_inc(c->counters[BCH_COUNTER_bucket_discard]); + count_event(c, bucket_discard); (*discarded)++; out: (*seen)++; @@ -1877,7 +1874,6 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, ret = bch2_bucket_do_index(trans, k, a, true) ?: bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw| BCH_TRANS_COMMIT_no_enospc); if (ret) goto bkey_err; @@ -1898,7 +1894,6 @@ int bch2_dev_freespace_init(struct bch_fs *c, struct bch_dev *ca, ret = bch2_btree_insert_trans(trans, BTREE_ID_freespace, freespace, 0) ?: bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw| BCH_TRANS_COMMIT_no_enospc); if (ret) goto bkey_err; diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index 72bb8491f..96671f166 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -84,9 +84,9 @@ static inline unsigned bch2_bucket_sectors_dirty(struct bch_alloc_v4 a) static inline unsigned bch2_bucket_sectors_fragmented(struct bch_dev *ca, struct bch_alloc_v4 a) { - unsigned d = bch2_bucket_sectors_dirty(a); + int d = bch2_bucket_sectors_dirty(a); - return d ? max(0U, ca->mi.bucket_size - d) : 0; + return d ? max(0, ca->mi.bucket_size - d) : 0; } static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index 1ba0eeb75..a961df74d 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -697,11 +697,9 @@ static int add_new_bucket(struct bch_fs *c, bch_dev_bkey_exists(c, ob->dev)->mi.durability; BUG_ON(*nr_effective >= nr_replicas); - BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS); __clear_bit(ob->dev, devs_may_alloc->d); - *nr_effective += (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS) - ? durability : 1; + *nr_effective += durability; *have_cache |= !durability; ob_push(c, ptrs, ob); diff --git a/libbcachefs/backpointers.c b/libbcachefs/backpointers.c index 8c66333bc..12820acbc 100644 --- a/libbcachefs/backpointers.c +++ b/libbcachefs/backpointers.c @@ -397,7 +397,7 @@ int bch2_check_btree_backpointers(struct bch_fs *c) ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, 0, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_check_btree_backpointer(trans, &iter, k))); if (ret) bch_err_fn(c, ret); @@ -621,7 +621,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw| BCH_TRANS_COMMIT_no_enospc, check_btree_root_to_backpointers(trans, btree_id, bucket_start, bucket_end, @@ -635,7 +634,6 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, BTREE_ITER_PREFETCH); for_each_btree_key_continue(trans, iter, BTREE_ITER_PREFETCH, k, ret) { ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw| BCH_TRANS_COMMIT_no_enospc, check_extent_to_backpointers(trans, btree_id, level, bucket_start, bucket_end, @@ -810,7 +808,7 @@ static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_one_backpointer(trans, start, end, bkey_s_c_to_backpointer(k), &last_flushed_pos)); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 2e9f4af3a..bb2a0cc43 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -223,9 +223,11 @@ #define race_fault(...) dynamic_fault("bcachefs:race") +#define count_event(_c, _name) this_cpu_inc((_c)->counters[BCH_COUNTER_##_name]) + #define trace_and_count(_c, _name, ...) \ do { \ - this_cpu_inc((_c)->counters[BCH_COUNTER_##_name]); \ + count_event(_c, _name); \ trace_##_name(__VA_ARGS__); \ } while (0) diff --git a/libbcachefs/bcachefs_ioctl.h b/libbcachefs/bcachefs_ioctl.h index 44ba7a87a..43822c172 100644 --- a/libbcachefs/bcachefs_ioctl.h +++ b/libbcachefs/bcachefs_ioctl.h @@ -276,7 +276,7 @@ struct bch_ioctl_fs_usage { __u32 replica_entries_bytes; __u32 pad; - struct bch_replicas_usage replicas[0]; + struct bch_replicas_usage replicas[]; }; /* @@ -313,7 +313,7 @@ struct bch_ioctl_dev_usage_v2 { __u32 bucket_size; __u64 nr_buckets; - struct bch_ioctl_dev_usage_type d[0]; + struct bch_ioctl_dev_usage_type d[]; }; /* diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 70e478807..d16156074 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1541,8 +1541,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) rcu_assign_pointer(ca->buckets_gc, buckets); } - for_each_btree_key(trans, iter, BTREE_ID_alloc, POS_MIN, - BTREE_ITER_PREFETCH, k, ret) { + ret = for_each_btree_key2(trans, iter, BTREE_ID_alloc, POS_MIN, + BTREE_ITER_PREFETCH, k, ({ ca = bch_dev_bkey_exists(c, k.k->p.inode); g = gc_bucket(ca, k.k->p.offset); @@ -1561,8 +1561,9 @@ static int bch2_gc_alloc_start(struct bch_fs *c, bool metadata_only) g->stripe = a->stripe; g->stripe_redundancy = a->stripe_redundancy; } - } - bch2_trans_iter_exit(trans, &iter); + + 0; + })); err: bch2_trans_put(trans); if (ret) diff --git a/libbcachefs/btree_iter.c b/libbcachefs/btree_iter.c index bdc808087..f430ca837 100644 --- a/libbcachefs/btree_iter.c +++ b/libbcachefs/btree_iter.c @@ -1492,6 +1492,22 @@ static void bch2_trans_update_max_paths(struct btree_trans *trans) trans->nr_max_paths = hweight64(trans->paths_allocated); } +noinline __cold +int __bch2_btree_trans_too_many_iters(struct btree_trans *trans) +{ + if (trace_trans_restart_too_many_iters_enabled()) { + struct printbuf buf = PRINTBUF; + + bch2_trans_paths_to_text(&buf, trans); + trace_trans_restart_too_many_iters(trans, _THIS_IP_, buf.buf); + printbuf_exit(&buf); + } + + count_event(trans->c, trans_restart_too_many_iters); + + return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters); +} + static noinline void btree_path_overflow(struct btree_trans *trans) { bch2_dump_trans_paths_updates(trans); @@ -3027,6 +3043,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) struct btree_path *path; struct btree_bkey_cached_common *b; static char lock_types[] = { 'r', 'i', 'w' }; + struct task_struct *task = READ_ONCE(trans->locking_wait.task); unsigned l, idx; if (!out->nr_tabstops) { @@ -3034,7 +3051,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans) printbuf_tabstop_push(out, 32); } - prt_printf(out, "%i %s\n", trans->locking_wait.task->pid, trans->fn); + prt_printf(out, "%i %s\n", task ? task->pid : 0, trans->fn); trans_for_each_path_safe(trans, path, idx) { if (!path->nodes_locked) diff --git a/libbcachefs/btree_iter.h b/libbcachefs/btree_iter.h index a4fec7cce..75beb1831 100644 --- a/libbcachefs/btree_iter.h +++ b/libbcachefs/btree_iter.h @@ -627,12 +627,12 @@ static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter * return bch2_btree_iter_peek_slot(iter); } +int __bch2_btree_trans_too_many_iters(struct btree_trans *); + static inline int btree_trans_too_many_iters(struct btree_trans *trans) { - if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX - 8) { - trace_and_count(trans->c, trans_restart_too_many_iters, trans, _THIS_IP_); - return btree_trans_restart(trans, BCH_ERR_transaction_restart_too_many_iters); - } + if (hweight64(trans->paths_allocated) > BTREE_ITER_MAX - 8) + return __bch2_btree_trans_too_many_iters(trans); return 0; } diff --git a/libbcachefs/btree_key_cache.c b/libbcachefs/btree_key_cache.c index c64f8db06..c5e8a4612 100644 --- a/libbcachefs/btree_key_cache.c +++ b/libbcachefs/btree_key_cache.c @@ -737,7 +737,7 @@ int bch2_btree_key_cache_journal_flush(struct journal *j, } six_unlock_read(&ck->c.lock); - ret = commit_do(trans, NULL, NULL, 0, + ret = lockrestart_do(trans, btree_key_cache_flush_pos(trans, key, seq, BCH_TRANS_COMMIT_journal_reclaim, false)); unlock: diff --git a/libbcachefs/btree_locking.c b/libbcachefs/btree_locking.c index 59c57c585..89f14b5a5 100644 --- a/libbcachefs/btree_locking.c +++ b/libbcachefs/btree_locking.c @@ -141,10 +141,28 @@ static bool lock_graph_remove_non_waiters(struct lock_graph *g) return false; } +static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans, + unsigned long ip) +{ + struct bch_fs *c = trans->c; + + count_event(c, trans_restart_would_deadlock); + + if (trace_trans_restart_would_deadlock_enabled()) { + struct printbuf buf = PRINTBUF; + + buf.atomic++; + print_cycle(&buf, g); + + trace_trans_restart_would_deadlock(trans, ip, buf.buf); + printbuf_exit(&buf); + } +} + static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i) { if (i == g->g) { - trace_and_count(i->trans->c, trans_restart_would_deadlock, i->trans, _RET_IP_); + trace_would_deadlock(g, i->trans, _RET_IP_); return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock); } else { i->trans->lock_must_abort = true; @@ -265,15 +283,16 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle) unsigned path_idx; int ret; + g.nr = 0; + if (trans->lock_must_abort) { if (cycle) return -1; - trace_and_count(trans->c, trans_restart_would_deadlock, trans, _RET_IP_); + trace_would_deadlock(&g, trans, _RET_IP_); return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock); } - g.nr = 0; lock_graph_down(&g, trans); next: if (!g.nr) diff --git a/libbcachefs/btree_update_interior.c b/libbcachefs/btree_update_interior.c index 68627061b..c9f07ca49 100644 --- a/libbcachefs/btree_update_interior.c +++ b/libbcachefs/btree_update_interior.c @@ -2279,6 +2279,10 @@ int bch2_btree_node_update_key_get_iter(struct btree_trans *trans, BUG_ON(!btree_node_hashed(b)); + struct bch_extent_ptr *ptr; + bch2_bkey_drop_ptrs(bkey_i_to_s(new_key), ptr, + !bch2_bkey_has_device(bkey_i_to_s(&b->key), ptr->dev)); + ret = bch2_btree_node_update_key(trans, &iter, b, new_key, commit_flags, skip_triggers); out: diff --git a/libbcachefs/btree_write_buffer.c b/libbcachefs/btree_write_buffer.c index 6a1915680..6ab265762 100644 --- a/libbcachefs/btree_write_buffer.c +++ b/libbcachefs/btree_write_buffer.c @@ -33,17 +33,32 @@ static int btree_write_buffered_journal_cmp(const void *_l, const void *_r) return cmp_int(l->journal_seq, r->journal_seq); } -static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans, - struct btree_iter *iter, - struct btree_write_buffered_key *wb, - unsigned commit_flags, - bool *write_locked, - size_t *fast) +static noinline int wb_flush_one_slowpath(struct btree_trans *trans, + struct btree_iter *iter, + struct btree_write_buffered_key *wb) +{ + bch2_btree_node_unlock_write(trans, iter->path, iter->path->l[0].b); + + trans->journal_res.seq = wb->journal_seq; + + return bch2_trans_update(trans, iter, &wb->k, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: + bch2_trans_commit(trans, NULL, NULL, + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_journal_res| + BCH_TRANS_COMMIT_journal_reclaim); +} + +static inline int wb_flush_one(struct btree_trans *trans, struct btree_iter *iter, + struct btree_write_buffered_key *wb, + bool *write_locked, size_t *fast) { struct bch_fs *c = trans->c; struct btree_path *path; int ret; + EBUG_ON(!wb->journal_seq); ret = bch2_btree_iter_traverse(iter); if (ret) return ret; @@ -66,26 +81,14 @@ static int bch2_btree_write_buffer_flush_one(struct btree_trans *trans, *write_locked = true; } - if (!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s)) { - bch2_btree_node_unlock_write(trans, path, path->l[0].b); + if (unlikely(!bch2_btree_node_insert_fits(c, path->l[0].b, wb->k.k.u64s))) { *write_locked = false; - goto trans_commit; + return wb_flush_one_slowpath(trans, iter, wb); } bch2_btree_insert_key_leaf(trans, path, &wb->k, wb->journal_seq); (*fast)++; return 0; -trans_commit: - trans->journal_res.seq = wb->journal_seq; - - return bch2_trans_update(trans, iter, &wb->k, - BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: - bch2_trans_commit(trans, NULL, NULL, - commit_flags| - BCH_TRANS_COMMIT_no_check_rw| - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_no_journal_res| - BCH_TRANS_COMMIT_journal_reclaim); } static union btree_write_buffer_state btree_write_buffer_switch(struct btree_write_buffer *wb) @@ -160,9 +163,6 @@ int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) keys = wb->keys[s.idx]; nr = s.nr; - if (race_fault()) - goto slowpath; - /* * We first sort so that we can detect and skip redundant updates, and * then we attempt to flush in sorted btree order, as this is most @@ -206,66 +206,68 @@ int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) iter.path->preserve = false; do { - ret = bch2_btree_write_buffer_flush_one(trans, &iter, i, 0, - &write_locked, &fast); + if (race_fault()) { + ret = -BCH_ERR_journal_reclaim_would_deadlock; + break; + } + + ret = wb_flush_one(trans, &iter, i, &write_locked, &fast); if (!write_locked) bch2_trans_begin(trans); } while (bch2_err_matches(ret, BCH_ERR_transaction_restart)); - if (ret == -BCH_ERR_journal_reclaim_would_deadlock) { + if (!ret) { + i->journal_seq = 0; + } else if (ret == -BCH_ERR_journal_reclaim_would_deadlock) { slowpath++; - continue; - } - if (ret) + ret = 0; + } else break; - - i->journal_seq = 0; } if (write_locked) bch2_btree_node_unlock_write(trans, iter.path, iter.path->l[0].b); bch2_trans_iter_exit(trans, &iter); - trace_write_buffer_flush(trans, nr, skipped, fast, wb->size); - - if (slowpath) - goto slowpath; - + if (ret) + goto err; + + if (slowpath) { + /* + * Flush in the order they were present in the journal, so that + * we can release journal pins: + * The fastpath zapped the seq of keys that were successfully flushed so + * we can skip those here. + */ + trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, nr); + + sort(keys, nr, sizeof(keys[0]), + btree_write_buffered_journal_cmp, + NULL); + + for (i = keys; i < keys + nr; i++) { + if (!i->journal_seq) + continue; + + bch2_journal_pin_update(j, i->journal_seq, &pin, + bch2_btree_write_buffer_journal_flush); + + ret = commit_do(trans, NULL, NULL, + BCH_WATERMARK_reclaim| + BCH_TRANS_COMMIT_no_check_rw| + BCH_TRANS_COMMIT_no_enospc| + BCH_TRANS_COMMIT_no_journal_res| + BCH_TRANS_COMMIT_journal_reclaim, + btree_write_buffered_insert(trans, i)); + if (ret) + goto err; + } + } +err: bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret)); -out: + trace_write_buffer_flush(trans, nr, skipped, fast, wb->size); bch2_journal_pin_drop(j, &pin); return ret; -slowpath: - trace_and_count(c, write_buffer_flush_slowpath, trans, slowpath, nr); - - /* - * Now sort the rest by journal seq and bump the journal pin as we go. - * The slowpath zapped the seq of keys that were successfully flushed so - * we can skip those here. - */ - sort(keys, nr, sizeof(keys[0]), - btree_write_buffered_journal_cmp, - NULL); - - for (i = keys; i < keys + nr; i++) { - if (!i->journal_seq) - continue; - - bch2_journal_pin_update(j, i->journal_seq, &pin, - bch2_btree_write_buffer_journal_flush); - - ret = commit_do(trans, NULL, NULL, - BCH_WATERMARK_reclaim| - BCH_TRANS_COMMIT_no_check_rw| - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_no_journal_res| - BCH_TRANS_COMMIT_journal_reclaim, - btree_write_buffered_insert(trans, i)); - if (bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret))) - break; - } - - goto out; } int bch2_btree_write_buffer_flush_sync(struct btree_trans *trans) diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c index 64bdafe31..51af8ea23 100644 --- a/libbcachefs/compress.c +++ b/libbcachefs/compress.c @@ -576,7 +576,7 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) * ZSTD is lying: if we allocate the size of the workspace it says it * requires, it returns memory allocation errors */ - c->zstd_workspace_size = zstd_cctx_workspace_bound(¶ms.cParams) * 2; + c->zstd_workspace_size = zstd_cctx_workspace_bound(¶ms.cParams); struct { unsigned feature; diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index 31090796c..08c664cab 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -321,7 +321,7 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, &m->stats->sectors_raced); } - this_cpu_inc(c->counters[BCH_COUNTER_move_extent_fail]); + count_event(c, move_extent_fail); bch2_btree_iter_advance(&iter); goto next; diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c index 8cf4bcf9b..bc6b56628 100644 --- a/libbcachefs/fsck.c +++ b/libbcachefs/fsck.c @@ -207,10 +207,8 @@ static int fsck_write_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, u32 snapshot) { - int ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, - __write_inode(trans, inode, snapshot)); + int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + __write_inode(trans, inode, snapshot)); if (ret) bch_err_fn(trans->c, ret); return ret; @@ -353,9 +351,7 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode, u32 inode_snapshot) { - int ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw| - BCH_TRANS_COMMIT_no_enospc, + int ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, __reattach_inode(trans, inode, inode_snapshot)); bch_err_msg(trans->c, ret, "reattaching inode %llu", inode->bi_inum); return ret; @@ -756,9 +752,7 @@ static int hash_redo_key(struct btree_trans *trans, k.k->p.snapshot, tmp, BCH_HASH_SET_MUST_CREATE, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: - bch2_trans_commit(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw); + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); } static int hash_check_key(struct btree_trans *trans, @@ -879,7 +873,7 @@ static int check_inode(struct btree_trans *trans, c, inode_snapshot_mismatch, "inodes in different snapshots don't match")) { bch_err(c, "repair not implemented yet"); - return -EINVAL; + return -BCH_ERR_fsck_repair_unimplemented; } if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && @@ -905,12 +899,13 @@ static int check_inode(struct btree_trans *trans, if (u.bi_flags & BCH_INODE_unlinked && c->sb.version >= bcachefs_metadata_version_deleted_inodes) { ret = check_inode_deleted_list(trans, k.k->p); - if (ret) + if (ret < 0) return ret; fsck_err_on(ret, c, unlinked_inode_not_on_deleted_list, "inode %llu:%u unlinked, but not on deleted list", u.bi_inum, k.k->p.snapshot); + ret = 0; } if (u.bi_flags & BCH_INODE_unlinked && @@ -918,9 +913,6 @@ static int check_inode(struct btree_trans *trans, fsck_err(c, inode_unlinked_but_clean, "filesystem marked clean, but inode %llu unlinked", u.bi_inum))) { - bch2_trans_unlock(trans); - bch2_fs_lazy_rw(c); - ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); bch_err_msg(c, ret, "in fsck deleting inode"); return ret; @@ -933,9 +925,6 @@ static int check_inode(struct btree_trans *trans, u.bi_inum))) { bch_verbose(c, "truncating inode %llu", u.bi_inum); - bch2_trans_unlock(trans); - bch2_fs_lazy_rw(c); - /* * XXX: need to truncate partial blocks too here - or ideally * just switch units to bytes and that issue goes away @@ -999,7 +988,6 @@ static int check_inode(struct btree_trans *trans, return ret; } -noinline_for_stack int bch2_check_inodes(struct bch_fs *c) { bool full = c->opts.fsck; @@ -1015,7 +1003,7 @@ int bch2_check_inodes(struct bch_fs *c) ret = for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_inode(trans, &iter, k, &prev, &s, full)); snapshots_seen_exit(&s); @@ -1229,8 +1217,7 @@ static int overlapping_extents_found(struct btree_trans *trans, ret = bch2_trans_update_extent_overwrite(trans, old_iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE, k1, k2) ?: - bch2_trans_commit(trans, &res, NULL, - BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc); + bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); bch2_disk_reservation_put(c, &res); if (ret) @@ -1469,7 +1456,7 @@ int bch2_check_extents(struct bch_fs *c) POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, &res, NULL, - BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, ({ + BCH_TRANS_COMMIT_no_enospc, ({ bch2_disk_reservation_put(c, &res); check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: check_extent_overbig(trans, &iter, k); @@ -1498,7 +1485,7 @@ int bch2_check_indirect_extents(struct bch_fs *c) POS_MIN, BTREE_ITER_PREFETCH, k, &res, NULL, - BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, ({ + BCH_TRANS_COMMIT_no_enospc, ({ bch2_disk_reservation_put(c, &res); check_extent_overbig(trans, &iter, k); })); @@ -1871,7 +1858,7 @@ int bch2_check_dirents(struct bch_fs *c) BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + BCH_TRANS_COMMIT_no_enospc, check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)); bch2_trans_put(trans); @@ -1935,7 +1922,7 @@ int bch2_check_xattrs(struct bch_fs *c) BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, NULL, NULL, - BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + BCH_TRANS_COMMIT_no_enospc, check_xattr(trans, &iter, k, &hash_info, &inode))); bch_err_fn(c, ret); return ret; @@ -1966,8 +1953,7 @@ static int check_root_trans(struct btree_trans *trans) root_subvol.v.snapshot = cpu_to_le32(snapshot); root_subvol.v.inode = cpu_to_le64(inum); ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, + BCH_TRANS_COMMIT_no_enospc, bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0)); bch_err_msg(c, ret, "writing root subvol"); @@ -2002,9 +1988,7 @@ int bch2_check_root(struct bch_fs *c) { int ret; - ret = bch2_trans_do(c, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, + ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_root_trans(trans)); bch_err_fn(c, ret); return ret; @@ -2133,8 +2117,7 @@ static int check_path(struct btree_trans *trans, return 0; ret = commit_do(trans, NULL, NULL, - BCH_TRANS_COMMIT_no_enospc| - BCH_TRANS_COMMIT_lazy_rw, + BCH_TRANS_COMMIT_no_enospc, remove_backpointer(trans, inode)); if (ret) { bch_err(c, "error removing dirent: %i", ret); @@ -2415,7 +2398,7 @@ static int check_nlinks_update_hardlinks(struct bch_fs *c, for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS(0, range_start), BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); if (ret < 0) { bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); @@ -2500,7 +2483,7 @@ int bch2_fix_reflink_p(struct bch_fs *c) BTREE_ID_extents, POS_MIN, BTREE_ITER_INTENT|BTREE_ITER_PREFETCH| BTREE_ITER_ALL_SNAPSHOTS, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, fix_reflink_p_key(trans, &iter, k))); bch_err_fn(c, ret); return ret; diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c index 3281c4dd1..4c9eaf7ce 100644 --- a/libbcachefs/io_read.c +++ b/libbcachefs/io_read.c @@ -80,7 +80,7 @@ struct promote_op { struct bpos pos; struct data_update write; - struct bio_vec bi_inline_vecs[0]; /* must be last */ + struct bio_vec bi_inline_vecs[]; /* must be last */ }; static const struct rhashtable_params bch_promote_params = { diff --git a/libbcachefs/io_write.c b/libbcachefs/io_write.c index d6bd8f788..7c904f7d5 100644 --- a/libbcachefs/io_write.c +++ b/libbcachefs/io_write.c @@ -403,8 +403,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, BUG_ON(c->opts.nochanges); bkey_for_each_ptr(ptrs, ptr) { - BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX || - !c->devs[ptr->dev]); + BUG_ON(!bch2_dev_exists2(c, ptr->dev)); ca = bch_dev_bkey_exists(c, ptr->dev); diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c index d5540c856..acf9c358b 100644 --- a/libbcachefs/journal.c +++ b/libbcachefs/journal.c @@ -323,6 +323,8 @@ static int journal_entry_open(struct journal *j) atomic64_inc(&j->seq); journal_pin_list_init(fifo_push_ref(&j->pin), 1); + BUG_ON(j->pin.back - 1 != atomic64_read(&j->seq)); + BUG_ON(j->buf + (journal_cur_seq(j) & JOURNAL_BUF_MASK) != buf); bkey_extent_init(&buf->key); diff --git a/libbcachefs/journal.h b/libbcachefs/journal.h index c85d01cf4..e1e9e60fa 100644 --- a/libbcachefs/journal.h +++ b/libbcachefs/journal.h @@ -119,7 +119,6 @@ static inline void journal_wake(struct journal *j) { wake_up(&j->wait); closure_wake_up(&j->async_wait); - closure_wake_up(&j->preres_wait); } static inline struct journal_buf *journal_cur_buf(struct journal *j) @@ -136,9 +135,7 @@ static inline u64 journal_last_seq(struct journal *j) static inline u64 journal_cur_seq(struct journal *j) { - EBUG_ON(j->pin.back - 1 != atomic64_read(&j->seq)); - - return j->pin.back - 1; + return atomic64_read(&j->seq); } static inline u64 journal_last_unwritten_seq(struct journal *j) diff --git a/libbcachefs/journal_types.h b/libbcachefs/journal_types.h index 2427cce64..4ffae252e 100644 --- a/libbcachefs/journal_types.h +++ b/libbcachefs/journal_types.h @@ -195,7 +195,6 @@ struct journal { /* Used when waiting because the journal was full */ wait_queue_head_t wait; struct closure_waitlist async_wait; - struct closure_waitlist preres_wait; struct closure io; struct delayed_work write_work; diff --git a/libbcachefs/move.c b/libbcachefs/move.c index db14ec376..3efe6a6e1 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -70,7 +70,7 @@ struct moving_io { struct data_update write; /* Must be last since it is variable size */ - struct bio_vec bi_inline_vecs[0]; + struct bio_vec bi_inline_vecs[]; }; static void move_free(struct moving_io *io) @@ -345,7 +345,12 @@ int bch2_move_extent(struct moving_context *ctxt, if (ret == -BCH_ERR_data_update_done) return 0; - this_cpu_inc(c->counters[BCH_COUNTER_move_extent_start_fail]); + if (bch2_err_matches(ret, EROFS) || + bch2_err_matches(ret, BCH_ERR_transaction_restart)) + return ret; + + count_event(c, move_extent_start_fail); + if (trace_move_extent_start_fail_enabled()) { struct printbuf buf = PRINTBUF; @@ -461,7 +466,8 @@ int bch2_move_ratelimit(struct moving_context *ctxt) if (delay) move_ctxt_wait_event_timeout(ctxt, - freezing(current) || kthread_should_stop(), + freezing(current) || + kthread_should_stop(), delay); if (unlikely(freezing(current))) { @@ -680,6 +686,9 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, goto err; while (!(ret = bch2_move_ratelimit(ctxt))) { + if (kthread_should_stop()) + break; + bch2_trans_begin(trans); ret = bch2_get_next_backpointer(trans, bucket, gen, diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index d079ba7aa..794e4c56e 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -217,7 +217,7 @@ static int bch2_copygc(struct moving_context *ctxt, goto err; darray_for_each(buckets, i) { - if (unlikely(freezing(current))) + if (kthread_should_stop() || freezing(current)) break; f = move_bucket_in_flight_add(buckets_in_flight, *i); diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index 3f8c3ba10..98f1454c2 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -531,7 +531,7 @@ static int bch2_set_may_go_rw(struct bch_fs *c) keys->gap = keys->nr; set_bit(BCH_FS_may_go_rw, &c->flags); - if (keys->nr) + if (keys->nr || c->opts.fsck) return bch2_fs_read_write_early(c); return 0; } diff --git a/libbcachefs/replicas_types.h b/libbcachefs/replicas_types.h index 030324078..ac90d142c 100644 --- a/libbcachefs/replicas_types.h +++ b/libbcachefs/replicas_types.h @@ -21,7 +21,7 @@ struct replicas_delta_list { u64 nr_inodes; u64 persistent_reserved[BCH_REPLICAS_MAX]; struct {} memset_end; - struct replicas_delta d[0]; + struct replicas_delta d[]; }; #endif /* _BCACHEFS_REPLICAS_TYPES_H */ diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c index e473c788f..b2d216fa7 100644 --- a/libbcachefs/snapshot.c +++ b/libbcachefs/snapshot.c @@ -590,7 +590,7 @@ int bch2_check_snapshot_trees(struct bch_fs *c) for_each_btree_key_commit(trans, iter, BTREE_ID_snapshot_trees, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_snapshot_tree(trans, &iter, k))); if (ret) @@ -868,7 +868,7 @@ int bch2_check_snapshots(struct bch_fs *c) for_each_btree_key_reverse_commit(trans, iter, BTREE_ID_snapshots, POS_MAX, BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_snapshot(trans, &iter, k))); if (ret) bch_err_fn(c, ret); diff --git a/libbcachefs/subvolume.c b/libbcachefs/subvolume.c index 1cbf9e3a0..4e258b7d1 100644 --- a/libbcachefs/subvolume.c +++ b/libbcachefs/subvolume.c @@ -37,8 +37,6 @@ static int check_subvol(struct btree_trans *trans, return ret; if (BCH_SUBVOLUME_UNLINKED(subvol.v)) { - bch2_fs_lazy_rw(c); - ret = bch2_subvolume_delete(trans, iter->pos.offset); if (ret) bch_err_msg(c, ret, "deleting subvolume %llu", iter->pos.offset); @@ -89,7 +87,7 @@ int bch2_check_subvols(struct bch_fs *c) ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_lazy_rw|BCH_TRANS_COMMIT_no_enospc, + NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_subvol(trans, &iter, k))); if (ret) bch_err_fn(c, ret); diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h index 6eced95ce..6e2ad6f3d 100644 --- a/libbcachefs/trace.h +++ b/libbcachefs/trace.h @@ -32,7 +32,7 @@ DECLARE_EVENT_CLASS(bpos, TP_printk("%llu:%llu:%u", __entry->p_inode, __entry->p_offset, __entry->p_snapshot) ); -DECLARE_EVENT_CLASS(str, +DECLARE_EVENT_CLASS(fs_str, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str), @@ -49,6 +49,29 @@ DECLARE_EVENT_CLASS(str, TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str)) ); +DECLARE_EVENT_CLASS(trans_str, + TP_PROTO(struct btree_trans *trans, unsigned long caller_ip, const char *str), + TP_ARGS(trans, caller_ip, str), + + TP_STRUCT__entry( + __field(dev_t, dev ) + __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + __string(str, str ) + ), + + TP_fast_assign( + __entry->dev = trans->c->dev; + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + __assign_str(str, str); + ), + + TP_printk("%d,%d %s %pS %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->trans_fn, (void *) __entry->caller_ip, __get_str(str)) +); + DECLARE_EVENT_CLASS(btree_node, TP_PROTO(struct bch_fs *c, struct btree *b), TP_ARGS(c, b), @@ -738,22 +761,22 @@ TRACE_EVENT(bucket_evacuate, __entry->dev_idx, __entry->bucket) ); -DEFINE_EVENT(str, move_extent, +DEFINE_EVENT(fs_str, move_extent, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); -DEFINE_EVENT(str, move_extent_read, +DEFINE_EVENT(fs_str, move_extent_read, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); -DEFINE_EVENT(str, move_extent_write, +DEFINE_EVENT(fs_str, move_extent_write, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); -DEFINE_EVENT(str, move_extent_finish, +DEFINE_EVENT(fs_str, move_extent_finish, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); @@ -775,7 +798,7 @@ TRACE_EVENT(move_extent_fail, TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg)) ); -DEFINE_EVENT(str, move_extent_start_fail, +DEFINE_EVENT(fs_str, move_extent_start_fail, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str) ); @@ -1008,10 +1031,11 @@ DEFINE_EVENT(transaction_event, trans_restart_key_cache_raced, TP_ARGS(trans, caller_ip) ); -DEFINE_EVENT(transaction_event, trans_restart_too_many_iters, +DEFINE_EVENT(trans_str, trans_restart_too_many_iters, TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip), - TP_ARGS(trans, caller_ip) + unsigned long caller_ip, + const char *paths), + TP_ARGS(trans, caller_ip, paths) ); DECLARE_EVENT_CLASS(transaction_restart_iter, @@ -1181,10 +1205,11 @@ DEFINE_EVENT(transaction_restart_iter, trans_restart_memory_allocation_failure, TP_ARGS(trans, caller_ip, path) ); -DEFINE_EVENT(transaction_event, trans_restart_would_deadlock, +DEFINE_EVENT(trans_str, trans_restart_would_deadlock, TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip), - TP_ARGS(trans, caller_ip) + unsigned long caller_ip, + const char *cycle), + TP_ARGS(trans, caller_ip, cycle) ); DEFINE_EVENT(transaction_event, trans_restart_would_deadlock_recursion_limit, @@ -1368,12 +1393,12 @@ TRACE_EVENT(write_buffer_flush_slowpath, TP_printk("%zu/%zu", __entry->slowpath, __entry->total) ); -DEFINE_EVENT(str, rebalance_extent, +DEFINE_EVENT(fs_str, rebalance_extent, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str) ); -DEFINE_EVENT(str, data_update, +DEFINE_EVENT(fs_str, data_update, TP_PROTO(struct bch_fs *c, const char *str), TP_ARGS(c, str) );