diff --git a/.bcachefs_revision b/.bcachefs_revision index 4649f2ba2..97936a157 100644 --- a/.bcachefs_revision +++ b/.bcachefs_revision @@ -1 +1 @@ -783085c3cc440183ba5e987b1aa7791cc1ca42ba +8c94740b1bf8645d3398170f41c9c88b78332252 diff --git a/libbcachefs/alloc_background.c b/libbcachefs/alloc_background.c index 1ed8506c3..56a18ace8 100644 --- a/libbcachefs/alloc_background.c +++ b/libbcachefs/alloc_background.c @@ -261,10 +261,8 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_DATA_free: case BCH_DATA_need_gc_gens: case BCH_DATA_need_discard: - bkey_fsck_err_on(a.v->dirty_sectors || - a.v->cached_sectors || - a.v->stripe, c, err, - alloc_key_empty_but_have_data, + bkey_fsck_err_on(bch2_bucket_sectors(*a.v) || a.v->stripe, + c, err, alloc_key_empty_but_have_data, "empty data type free but have data"); break; case BCH_DATA_sb: @@ -272,22 +270,21 @@ int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_DATA_btree: case BCH_DATA_user: case BCH_DATA_parity: - bkey_fsck_err_on(!a.v->dirty_sectors, c, err, - alloc_key_dirty_sectors_0, + bkey_fsck_err_on(!bch2_bucket_sectors_dirty(*a.v), + c, err, alloc_key_dirty_sectors_0, "data_type %s but dirty_sectors==0", bch2_data_types[a.v->data_type]); break; case BCH_DATA_cached: bkey_fsck_err_on(!a.v->cached_sectors || - a.v->dirty_sectors || - a.v->stripe, c, err, - alloc_key_cached_inconsistency, + bch2_bucket_sectors_dirty(*a.v) || + a.v->stripe, + c, err, alloc_key_cached_inconsistency, "data type inconsistency"); bkey_fsck_err_on(!a.v->io_time[READ] && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, - c, err, - alloc_key_cached_but_read_time_zero, + c, err, alloc_key_cached_but_read_time_zero, "cached bucket with read_time == 0"); break; case BCH_DATA_stripe: @@ -790,8 +787,7 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, new_a->data_type = alloc_data_type(*new_a, new_a->data_type); - if (new_a->dirty_sectors > old_a->dirty_sectors || - new_a->cached_sectors > old_a->cached_sectors) { + if (bch2_bucket_sectors(*new_a) > bch2_bucket_sectors(*old_a)) { new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); new_a->io_time[WRITE]= max_t(u64, 1, atomic64_read(&c->io_clock[WRITE].now)); SET_BCH_ALLOC_V4_NEED_INC_GEN(new_a, true); @@ -1509,6 +1505,27 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, if (a->data_type != BCH_DATA_cached) return 0; + if (fsck_err_on(!a->io_time[READ], c, + alloc_key_cached_but_read_time_zero, + "cached bucket with read_time 0\n" + " %s", + (printbuf_reset(&buf), + bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { + struct bkey_i_alloc_v4 *a_mut = + bch2_alloc_to_v4_mut(trans, alloc_k); + ret = PTR_ERR_OR_ZERO(a_mut); + if (ret) + goto err; + + a_mut->v.io_time[READ] = atomic64_read(&c->io_clock[READ].now); + ret = bch2_trans_update(trans, alloc_iter, + &a_mut->k_i, BTREE_TRIGGER_NORUN); + if (ret) + goto err; + + a = &a_mut->v; + } + lru_k = bch2_bkey_get_iter(trans, &lru_iter, BTREE_ID_lru, lru_pos(alloc_k.k->p.inode, bucket_to_u64(alloc_k.k->p), @@ -1517,41 +1534,18 @@ static int bch2_check_alloc_to_lru_ref(struct btree_trans *trans, if (ret) return ret; - if (fsck_err_on(!a->io_time[READ], c, - alloc_key_cached_but_read_time_zero, - "cached bucket with read_time 0\n" - " %s", - (printbuf_reset(&buf), - bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf)) || - fsck_err_on(lru_k.k->type != KEY_TYPE_set, c, + if (fsck_err_on(lru_k.k->type != KEY_TYPE_set, c, alloc_key_to_missing_lru_entry, "missing lru entry\n" " %s", (printbuf_reset(&buf), bch2_bkey_val_to_text(&buf, c, alloc_k), buf.buf))) { - u64 read_time = a->io_time[READ] ?: - atomic64_read(&c->io_clock[READ].now); - ret = bch2_lru_set(trans, alloc_k.k->p.inode, bucket_to_u64(alloc_k.k->p), - read_time); + a->io_time[READ]); if (ret) goto err; - - if (a->io_time[READ] != read_time) { - struct bkey_i_alloc_v4 *a_mut = - bch2_alloc_to_v4_mut(trans, alloc_k); - ret = PTR_ERR_OR_ZERO(a_mut); - if (ret) - goto err; - - a_mut->v.io_time[READ] = read_time; - ret = bch2_trans_update(trans, alloc_iter, - &a_mut->k_i, BTREE_TRIGGER_NORUN); - if (ret) - goto err; - } } err: fsck_err: @@ -1564,15 +1558,13 @@ int bch2_check_alloc_to_lru_refs(struct bch_fs *c) { struct btree_iter iter; struct bkey_s_c k; - int ret = 0; - ret = bch2_trans_run(c, + int ret = bch2_trans_run(c, for_each_btree_key_commit(trans, iter, BTREE_ID_alloc, POS_MIN, BTREE_ITER_PREFETCH, k, NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, bch2_check_alloc_to_lru_ref(trans, &iter))); - if (ret) - bch_err_fn(c, ret); + bch_err_fn(c, ret); return ret; } @@ -1734,28 +1726,25 @@ void bch2_do_discards(struct bch_fs *c) static int invalidate_one_bucket(struct btree_trans *trans, struct btree_iter *lru_iter, struct bkey_s_c lru_k, + struct bpos *last_flushed_pos, s64 *nr_to_invalidate) { struct bch_fs *c = trans->c; - struct btree_iter alloc_iter = { NULL }; - struct bkey_i_alloc_v4 *a = NULL; - struct printbuf buf = PRINTBUF; - struct bpos bucket = u64_to_bucket(lru_k.k->p.offset); - unsigned cached_sectors; int ret = 0; if (*nr_to_invalidate <= 0) return 1; - if (!bch2_dev_bucket_exists(c, bucket)) { - prt_str(&buf, "lru entry points to invalid bucket"); - goto err; - } + ret = bch2_check_lru_key(trans, lru_iter, lru_k, last_flushed_pos); + if (ret) + return ret < 0 ? ret : 0; + struct bpos bucket = u64_to_bucket(lru_k.k->p.offset); if (bch2_bucket_is_open_safe(c, bucket.inode, bucket.offset)) return 0; - a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket); + struct btree_iter alloc_iter; + struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, &alloc_iter, bucket); ret = PTR_ERR_OR_ZERO(a); if (ret) goto out; @@ -1769,7 +1758,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, if (!a->v.cached_sectors) bch_err(c, "invalidating empty bucket, confused"); - cached_sectors = a->v.cached_sectors; + unsigned cached_sectors = a->v.cached_sectors; SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); a->v.gen++; @@ -1791,28 +1780,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, --*nr_to_invalidate; out: bch2_trans_iter_exit(trans, &alloc_iter); - printbuf_exit(&buf); return ret; -err: - prt_str(&buf, "\n lru key: "); - bch2_bkey_val_to_text(&buf, c, lru_k); - - prt_str(&buf, "\n lru entry: "); - bch2_lru_pos_to_text(&buf, lru_iter->pos); - - prt_str(&buf, "\n alloc key: "); - if (!a) - bch2_bpos_to_text(&buf, bucket); - else - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); - - bch_err(c, "%s", buf.buf); - if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_lrus) { - bch2_inconsistent_error(c); - ret = -EINVAL; - } - - goto out; } static void bch2_do_invalidates_work(struct work_struct *work) @@ -1822,6 +1790,7 @@ static void bch2_do_invalidates_work(struct work_struct *work) struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; struct bkey_s_c k; + struct bpos last_flushed_pos = POS_MIN; unsigned i; int ret = 0; @@ -1837,7 +1806,8 @@ static void bch2_do_invalidates_work(struct work_struct *work) lru_pos(ca->dev_idx, 0, 0), lru_pos(ca->dev_idx, U64_MAX, LRU_TIME_MAX), BTREE_ITER_INTENT, k, - invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate)); + invalidate_one_bucket(trans, &iter, k, &last_flushed_pos, + &nr_to_invalidate)); if (ret < 0) { percpu_ref_put(&ca->ref); diff --git a/libbcachefs/alloc_background.h b/libbcachefs/alloc_background.h index 73faf99a2..72bb8491f 100644 --- a/libbcachefs/alloc_background.h +++ b/libbcachefs/alloc_background.h @@ -71,6 +71,24 @@ static inline enum bch_data_type bucket_data_type(enum bch_data_type data_type) return data_type == BCH_DATA_stripe ? BCH_DATA_user : data_type; } +static inline unsigned bch2_bucket_sectors(struct bch_alloc_v4 a) +{ + return a.dirty_sectors + a.cached_sectors; +} + +static inline unsigned bch2_bucket_sectors_dirty(struct bch_alloc_v4 a) +{ + return a.dirty_sectors; +} + +static inline unsigned bch2_bucket_sectors_fragmented(struct bch_dev *ca, + struct bch_alloc_v4 a) +{ + unsigned d = bch2_bucket_sectors_dirty(a); + + return d ? max(0U, ca->mi.bucket_size - d) : 0; +} + static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) { return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; @@ -90,10 +108,11 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, struct bch_dev *ca) { if (!data_type_movable(a.data_type) || - a.dirty_sectors >= ca->mi.bucket_size) + !bch2_bucket_sectors_fragmented(ca, a)) return 0; - return div_u64((u64) a.dirty_sectors * (1ULL << 31), ca->mi.bucket_size); + u64 d = bch2_bucket_sectors_dirty(a); + return div_u64(d * (1ULL << 31), ca->mi.bucket_size); } static inline u64 alloc_freespace_genbits(struct bch_alloc_v4 a) diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c index eef6fa8d0..1ba0eeb75 100644 --- a/libbcachefs/alloc_foreground.c +++ b/libbcachefs/alloc_foreground.c @@ -1345,6 +1345,9 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans, int ret; int i; + if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING)) + erasure_code = false; + BUG_ON(flags & BCH_WRITE_ONLY_SPECIFIED_DEVS); BUG_ON(!nr_replicas || !nr_replicas_required); diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h index 53f93f03f..295efeda1 100644 --- a/libbcachefs/bcachefs.h +++ b/libbcachefs/bcachefs.h @@ -935,7 +935,7 @@ struct bch_fs { mempool_t compression_bounce[2]; mempool_t compress_workspace[BCH_COMPRESSION_TYPE_NR]; mempool_t decompress_workspace; - ZSTD_parameters zstd_params; + size_t zstd_workspace_size; struct crypto_shash *sha256; struct crypto_sync_skcipher *chacha20; diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h index ad0f298c8..967780072 100644 --- a/libbcachefs/bcachefs_format.h +++ b/libbcachefs/bcachefs_format.h @@ -151,7 +151,11 @@ struct bpos { #else #error edit for your odd byteorder. #endif -} __packed __aligned(4); +} __packed +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +__aligned(4) +#endif +; #define KEY_INODE_MAX ((__u64)~0ULL) #define KEY_OFFSET_MAX ((__u64)~0ULL) @@ -2203,8 +2207,8 @@ struct jset_entry_dev_usage { __le32 dev; __u32 pad; - __le64 buckets_ec; - __le64 _buckets_unavailable; /* No longer used */ + __le64 _buckets_ec; /* No longer used */ + __le64 _buckets_unavailable; /* No longer used */ struct jset_entry_dev_usage_type d[]; }; diff --git a/libbcachefs/bcachefs_ioctl.h b/libbcachefs/bcachefs_ioctl.h index 18eb32540..44ba7a87a 100644 --- a/libbcachefs/bcachefs_ioctl.h +++ b/libbcachefs/bcachefs_ioctl.h @@ -81,6 +81,8 @@ struct bch_ioctl_incremental { #define BCH_IOCTL_SUBVOLUME_CREATE _IOW(0xbc, 16, struct bch_ioctl_subvolume) #define BCH_IOCTL_SUBVOLUME_DESTROY _IOW(0xbc, 17, struct bch_ioctl_subvolume) +#define BCH_IOCTL_DEV_USAGE_V2 _IOWR(0xbc, 18, struct bch_ioctl_dev_usage_v2) + /* ioctl below act on a particular file, not the filesystem as a whole: */ #define BCHFS_IOC_REINHERIT_ATTRS _IOR(0xbc, 64, const char __user *) @@ -298,7 +300,20 @@ struct bch_ioctl_dev_usage { __u64 buckets; __u64 sectors; __u64 fragmented; - } d[BCH_DATA_NR]; + } d[10]; +}; + +struct bch_ioctl_dev_usage_v2 { + __u64 dev; + __u32 flags; + __u8 state; + __u8 nr_data_types; + __u8 pad[6]; + + __u32 bucket_size; + __u64 nr_buckets; + + struct bch_ioctl_dev_usage_type d[0]; }; /* diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c index 7e5d52f8f..90f5bcfa3 100644 --- a/libbcachefs/btree_gc.c +++ b/libbcachefs/btree_gc.c @@ -1254,9 +1254,6 @@ static int bch2_gc_done(struct bch_fs *c, copy_dev_field(dev_usage_fragmented_wrong, d[i].fragmented, "%s fragmented", bch2_data_types[i]); } - - copy_dev_field(dev_usage_buckets_ec_wrong, - buckets_ec, "buckets_ec"); } { diff --git a/libbcachefs/btree_trans_commit.c b/libbcachefs/btree_trans_commit.c index 09e94cc46..7210d5c22 100644 --- a/libbcachefs/btree_trans_commit.c +++ b/libbcachefs/btree_trans_commit.c @@ -361,7 +361,6 @@ noinline static int btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags, struct btree_path *path, unsigned new_u64s) { - struct bch_fs *c = trans->c; struct btree_insert_entry *i; struct bkey_cached *ck = (void *) path->l[0].b; struct bkey_i *new_k; @@ -372,7 +371,7 @@ btree_key_can_insert_cached_slowpath(struct btree_trans *trans, unsigned flags, new_k = kmalloc(new_u64s * sizeof(u64), GFP_KERNEL); if (!new_k) { - bch_err(c, "error allocating memory for key cache key, btree %s u64s %u", + bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", bch2_btree_id_str(path->btree_id), new_u64s); return -BCH_ERR_ENOMEM_btree_key_cache_insert; } diff --git a/libbcachefs/btree_write_buffer.c b/libbcachefs/btree_write_buffer.c index 0c2db1fab..d3c38d2c0 100644 --- a/libbcachefs/btree_write_buffer.c +++ b/libbcachefs/btree_write_buffer.c @@ -29,14 +29,12 @@ static inline bool wb_key_cmp(const struct wb_key_ref *l, const struct wb_key_re #ifdef CONFIG_X86_64 int cmp; - asm(".intel_syntax noprefix;" - "mov rax, [%[l]];" - "sub rax, [%[r]];" - "mov rax, [%[l] + 8];" - "sbb rax, [%[r] + 8];" - "mov rax, [%[l] + 16];" - "sbb rax, [%[r] + 16];" - ".att_syntax prefix;" + asm("mov (%[l]), %%rax;" + "sub (%[r]), %%rax;" + "mov 8(%[l]), %%rax;" + "sbb 8(%[r]), %%rax;" + "mov 16(%[l]), %%rax;" + "sbb 16(%[r]), %%rax;" : "=@ccae" (cmp) : [l] "r" (l), [r] "r" (r) : "rax", "cc"); @@ -297,7 +295,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans) struct btree_write_buffered_key *n = &wb->flushing.keys.data[i[1].idx]; skipped++; - n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq);; + n->journal_seq = min_t(u64, n->journal_seq, k->journal_seq); k->journal_seq = 0; continue; } diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c index 5bfa102a0..50eb6ba2f 100644 --- a/libbcachefs/buckets.c +++ b/libbcachefs/buckets.c @@ -277,12 +277,28 @@ void bch2_dev_usage_init(struct bch_dev *ca) ca->usage_base->d[BCH_DATA_free].buckets = ca->mi.nbuckets - ca->mi.first_bucket; } -static inline int bucket_sectors_fragmented(struct bch_dev *ca, - struct bch_alloc_v4 a) +void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage) { - return a.dirty_sectors - ? max(0, (int) ca->mi.bucket_size - (int) a.dirty_sectors) - : 0; + prt_tab(out); + prt_str(out, "buckets"); + prt_tab_rjust(out); + prt_str(out, "sectors"); + prt_tab_rjust(out); + prt_str(out, "fragmented"); + prt_tab_rjust(out); + prt_newline(out); + + for (unsigned i = 0; i < BCH_DATA_NR; i++) { + prt_str(out, bch2_data_types[i]); + prt_tab(out); + prt_u64(out, usage->d[i].buckets); + prt_tab_rjust(out); + prt_u64(out, usage->d[i].sectors); + prt_tab_rjust(out); + prt_u64(out, usage->d[i].fragmented); + prt_tab_rjust(out); + prt_newline(out); + } } static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, @@ -306,41 +322,37 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca, u->d[old.data_type].buckets--; u->d[new.data_type].buckets++; - u->buckets_ec -= (int) !!old.stripe; - u->buckets_ec += (int) !!new.stripe; - - u->d[old.data_type].sectors -= old.dirty_sectors; - u->d[new.data_type].sectors += new.dirty_sectors; + u->d[old.data_type].sectors -= bch2_bucket_sectors_dirty(old); + u->d[new.data_type].sectors += bch2_bucket_sectors_dirty(new); u->d[BCH_DATA_cached].sectors += new.cached_sectors; u->d[BCH_DATA_cached].sectors -= old.cached_sectors; - u->d[old.data_type].fragmented -= bucket_sectors_fragmented(ca, old); - u->d[new.data_type].fragmented += bucket_sectors_fragmented(ca, new); + u->d[old.data_type].fragmented -= bch2_bucket_sectors_fragmented(ca, old); + u->d[new.data_type].fragmented += bch2_bucket_sectors_fragmented(ca, new); preempt_enable(); } +struct bch_alloc_v4 bucket_m_to_alloc(struct bucket b) +{ + return (struct bch_alloc_v4) { + .gen = b.gen, + .data_type = b.data_type, + .dirty_sectors = b.dirty_sectors, + .cached_sectors = b.cached_sectors, + .stripe = b.stripe, + }; +} + static void bch2_dev_usage_update_m(struct bch_fs *c, struct bch_dev *ca, struct bucket old, struct bucket new, u64 journal_seq, bool gc) { - struct bch_alloc_v4 old_a = { - .gen = old.gen, - .data_type = old.data_type, - .dirty_sectors = old.dirty_sectors, - .cached_sectors = old.cached_sectors, - .stripe = old.stripe, - }; - struct bch_alloc_v4 new_a = { - .gen = new.gen, - .data_type = new.data_type, - .dirty_sectors = new.dirty_sectors, - .cached_sectors = new.cached_sectors, - .stripe = new.stripe, - }; - - bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, gc); + bch2_dev_usage_update(c, ca, + bucket_m_to_alloc(old), + bucket_m_to_alloc(new), + journal_seq, gc); } static inline int __update_replicas(struct bch_fs *c, @@ -640,7 +652,6 @@ int bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca, goto err; } - g->data_type = data_type; g->dirty_sectors += sectors; new = *g; @@ -657,14 +668,11 @@ static int check_bucket_ref(struct btree_trans *trans, const struct bch_extent_ptr *ptr, s64 sectors, enum bch_data_type ptr_data_type, u8 b_gen, u8 bucket_data_type, - u32 dirty_sectors, u32 cached_sectors) + u32 bucket_sectors) { struct bch_fs *c = trans->c; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); size_t bucket_nr = PTR_BUCKET_NR(ca, ptr); - u32 bucket_sectors = !ptr->cached - ? dirty_sectors - : cached_sectors; struct printbuf buf = PRINTBUF; int ret = 0; @@ -799,7 +807,7 @@ static int mark_stripe_bucket(struct btree_trans *trans, ret = check_bucket_ref(trans, k, ptr, sectors, data_type, g->gen, g->data_type, - g->dirty_sectors, g->cached_sectors); + g->dirty_sectors); if (ret) goto err; @@ -829,8 +837,7 @@ static int __mark_pointer(struct btree_trans *trans, ? dirty_sectors : cached_sectors; int ret = check_bucket_ref(trans, k, ptr, sectors, ptr_data_type, - bucket_gen, *bucket_data_type, - *dirty_sectors, *cached_sectors); + bucket_gen, *bucket_data_type, *dst_sectors); if (ret) return ret; @@ -1559,7 +1566,7 @@ static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, ret = check_bucket_ref(trans, s.s_c, ptr, sectors, data_type, a->v.gen, a->v.data_type, - a->v.dirty_sectors, a->v.cached_sectors); + a->v.dirty_sectors); if (ret) goto err; @@ -2073,8 +2080,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) bucket_gens->first_bucket = ca->mi.first_bucket; bucket_gens->nbuckets = nbuckets; - bch2_copygc_stop(c); - if (resize) { down_write(&c->gc_lock); down_write(&ca->bucket_lock); diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h index 5574b62e0..bc0886730 100644 --- a/libbcachefs/buckets.h +++ b/libbcachefs/buckets.h @@ -203,6 +203,7 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) } void bch2_dev_usage_init(struct bch_dev *); +void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *); static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark) { diff --git a/libbcachefs/buckets_types.h b/libbcachefs/buckets_types.h index 2a9dab900..783f71017 100644 --- a/libbcachefs/buckets_types.h +++ b/libbcachefs/buckets_types.h @@ -33,8 +33,6 @@ struct bucket_gens { }; struct bch_dev_usage { - u64 buckets_ec; - struct { u64 buckets; u64 sectors; /* _compressed_ sectors: */ diff --git a/libbcachefs/chardev.c b/libbcachefs/chardev.c index de3d82de9..118f0c0c4 100644 --- a/libbcachefs/chardev.c +++ b/libbcachefs/chardev.c @@ -23,6 +23,12 @@ #include #include +__must_check +static int copy_to_user_errcode(void __user *to, const void *from, unsigned long n) +{ + return copy_to_user(to, from, n) ? -EFAULT : 0; +} + /* returns with ref on ca->ref */ static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev, unsigned flags) @@ -149,10 +155,8 @@ static long bch2_global_ioctl(unsigned cmd, void __user *arg) static long bch2_ioctl_query_uuid(struct bch_fs *c, struct bch_ioctl_query_uuid __user *user_arg) { - if (copy_to_user(&user_arg->uuid, &c->sb.user_uuid, - sizeof(c->sb.user_uuid))) - return -EFAULT; - return 0; + return copy_to_user_errcode(&user_arg->uuid, &c->sb.user_uuid, + sizeof(c->sb.user_uuid)); } #if 0 @@ -341,10 +345,7 @@ static ssize_t bch2_data_job_read(struct file *file, char __user *buf, if (len < sizeof(e)) return -EINVAL; - if (copy_to_user(buf, &e, sizeof(e))) - return -EFAULT; - - return sizeof(e); + return copy_to_user_errcode(buf, &e, sizeof(e)) ?: sizeof(e); } static const struct file_operations bcachefs_data_ops = { @@ -474,14 +475,15 @@ static long bch2_ioctl_fs_usage(struct bch_fs *c, if (ret) goto err; - if (copy_to_user(user_arg, arg, - sizeof(*arg) + arg->replica_entries_bytes)) - ret = -EFAULT; + + ret = copy_to_user_errcode(user_arg, arg, + sizeof(*arg) + arg->replica_entries_bytes); err: kfree(arg); return ret; } +/* obsolete, didn't allow for new data types: */ static long bch2_ioctl_dev_usage(struct bch_fs *c, struct bch_ioctl_dev_usage __user *user_arg) { @@ -511,7 +513,6 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c, arg.state = ca->mi.state; arg.bucket_size = ca->mi.bucket_size; arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; - arg.buckets_ec = src.buckets_ec; for (i = 0; i < BCH_DATA_NR; i++) { arg.d[i].buckets = src.d[i].buckets; @@ -521,10 +522,58 @@ static long bch2_ioctl_dev_usage(struct bch_fs *c, percpu_ref_put(&ca->ref); - if (copy_to_user(user_arg, &arg, sizeof(arg))) + return copy_to_user_errcode(user_arg, &arg, sizeof(arg)); +} + +static long bch2_ioctl_dev_usage_v2(struct bch_fs *c, + struct bch_ioctl_dev_usage_v2 __user *user_arg) +{ + struct bch_ioctl_dev_usage_v2 arg; + struct bch_dev_usage src; + struct bch_dev *ca; + int ret = 0; + + if (!test_bit(BCH_FS_STARTED, &c->flags)) + return -EINVAL; + + if (copy_from_user(&arg, user_arg, sizeof(arg))) return -EFAULT; - return 0; + if ((arg.flags & ~BCH_BY_INDEX) || + arg.pad[0] || + arg.pad[1] || + arg.pad[2]) + return -EINVAL; + + ca = bch2_device_lookup(c, arg.dev, arg.flags); + if (IS_ERR(ca)) + return PTR_ERR(ca); + + src = bch2_dev_usage_read(ca); + + arg.state = ca->mi.state; + arg.bucket_size = ca->mi.bucket_size; + arg.nr_data_types = min(arg.nr_data_types, BCH_DATA_NR); + arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket; + + ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg)); + if (ret) + goto err; + + for (unsigned i = 0; i < arg.nr_data_types; i++) { + struct bch_ioctl_dev_usage_type t = { + .buckets = src.d[i].buckets, + .sectors = src.d[i].sectors, + .fragmented = src.d[i].fragmented, + }; + + ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t)); + if (ret) + goto err; + } +err: + percpu_ref_put(&ca->ref); + return ret; } static long bch2_ioctl_read_super(struct bch_fs *c, @@ -561,9 +610,8 @@ static long bch2_ioctl_read_super(struct bch_fs *c, goto err; } - if (copy_to_user((void __user *)(unsigned long)arg.sb, sb, - vstruct_bytes(sb))) - ret = -EFAULT; + ret = copy_to_user_errcode((void __user *)(unsigned long)arg.sb, sb, + vstruct_bytes(sb)); err: if (!IS_ERR_OR_NULL(ca)) percpu_ref_put(&ca->ref); @@ -663,6 +711,8 @@ long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg) return bch2_ioctl_fs_usage(c, arg); case BCH_IOCTL_DEV_USAGE: return bch2_ioctl_dev_usage(c, arg); + case BCH_IOCTL_DEV_USAGE_V2: + return bch2_ioctl_dev_usage_v2(c, arg); #if 0 case BCH_IOCTL_START: BCH_IOCTL(start, struct bch_ioctl_start); diff --git a/libbcachefs/compress.c b/libbcachefs/compress.c index a8b148ec2..64bdafe31 100644 --- a/libbcachefs/compress.c +++ b/libbcachefs/compress.c @@ -354,8 +354,7 @@ static int attempt_compress(struct bch_fs *c, */ unsigned level = min((compression.level * 3) / 2, zstd_max_clevel()); ZSTD_parameters params = zstd_get_params(level, c->opts.encoded_extent_max); - ZSTD_CCtx *ctx = zstd_init_cctx(workspace, - zstd_cctx_workspace_bound(¶ms.cParams)); + ZSTD_CCtx *ctx = zstd_init_cctx(workspace, c->zstd_workspace_size); /* * ZSTD requires that when we decompress we pass in the exact @@ -371,7 +370,7 @@ static int attempt_compress(struct bch_fs *c, size_t len = zstd_compress_cctx(ctx, dst + 4, dst_len - 4 - 7, src, src_len, - &c->zstd_params); + ¶ms); if (zstd_is_error(len)) return 0; @@ -572,6 +571,13 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) size_t decompress_workspace_size = 0; ZSTD_parameters params = zstd_get_params(zstd_max_clevel(), c->opts.encoded_extent_max); + + /* + * ZSTD is lying: if we allocate the size of the workspace it says it + * requires, it returns memory allocation errors + */ + c->zstd_workspace_size = zstd_cctx_workspace_bound(¶ms.cParams) * 2; + struct { unsigned feature; enum bch_compression_type type; @@ -585,13 +591,11 @@ static int __bch2_fs_compress_init(struct bch_fs *c, u64 features) zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL), zlib_inflate_workspacesize(), }, { BCH_FEATURE_zstd, BCH_COMPRESSION_TYPE_zstd, - zstd_cctx_workspace_bound(¶ms.cParams), + c->zstd_workspace_size, zstd_dctx_workspace_bound() }, }, *i; bool have_compressed = false; - c->zstd_params = params; - for (i = compression_types; i < compression_types + ARRAY_SIZE(compression_types); i++) diff --git a/libbcachefs/data_update.c b/libbcachefs/data_update.c index 55769d77e..31090796c 100644 --- a/libbcachefs/data_update.c +++ b/libbcachefs/data_update.c @@ -267,6 +267,20 @@ static int __bch2_data_update_index_update(struct btree_trans *trans, goto out; } + if (trace_data_update_enabled()) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "\nold: "); + bch2_bkey_val_to_text(&buf, c, old); + prt_str(&buf, "\nk: "); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, "\nnew: "); + bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); + + trace_data_update(c, buf.buf); + printbuf_exit(&buf); + } + ret = bch2_insert_snapshot_whiteouts(trans, m->btree_id, k.k->p, bkey_start_pos(&insert->k)) ?: bch2_insert_snapshot_whiteouts(trans, m->btree_id, @@ -356,7 +370,7 @@ void bch2_data_update_exit(struct data_update *update) bch2_bio_free_pages_pool(c, &update->op.wbio.bio); } -void bch2_update_unwritten_extent(struct btree_trans *trans, +static void bch2_update_unwritten_extent(struct btree_trans *trans, struct data_update *update) { struct bch_fs *c = update->op.c; @@ -436,7 +450,51 @@ void bch2_update_unwritten_extent(struct btree_trans *trans, } } +int bch2_extent_drop_ptrs(struct btree_trans *trans, + struct btree_iter *iter, + struct bkey_s_c k, + struct data_update_opts data_opts) +{ + struct bch_fs *c = trans->c; + struct bkey_i *n; + int ret; + + n = bch2_bkey_make_mut_noupdate(trans, k); + ret = PTR_ERR_OR_ZERO(n); + if (ret) + return ret; + + while (data_opts.kill_ptrs) { + unsigned i = 0, drop = __fls(data_opts.kill_ptrs); + struct bch_extent_ptr *ptr; + + bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop); + data_opts.kill_ptrs ^= 1U << drop; + } + + /* + * If the new extent no longer has any pointers, bch2_extent_normalize() + * will do the appropriate thing with it (turning it into a + * KEY_TYPE_error key, or just a discard if it was a cached extent) + */ + bch2_extent_normalize(c, bkey_i_to_s(n)); + + /* + * Since we're not inserting through an extent iterator + * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators), + * we aren't using the extent overwrite path to delete, we're + * just using the normal key deletion path: + */ + if (bkey_deleted(&n->k)) + n->k.size = 0; + + return bch2_trans_relock(trans) ?: + bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: + bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); +} + int bch2_data_update_init(struct btree_trans *trans, + struct btree_iter *iter, struct moving_context *ctxt, struct data_update *m, struct write_point_specifier wp, @@ -452,7 +510,7 @@ int bch2_data_update_init(struct btree_trans *trans, const struct bch_extent_ptr *ptr; unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; unsigned ptrs_locked = 0; - int ret; + int ret = 0; bch2_bkey_buf_init(&m->k); bch2_bkey_buf_reassemble(&m->k, c, k); @@ -478,6 +536,8 @@ int bch2_data_update_init(struct btree_trans *trans, bkey_for_each_ptr(ptrs, ptr) percpu_ref_get(&bch_dev_bkey_exists(c, ptr->dev)->ref); + unsigned durability_have = 0, durability_removing = 0; + i = 0; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { bool locked; @@ -489,8 +549,11 @@ int bch2_data_update_init(struct btree_trans *trans, reserve_sectors += k.k->size; m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); - } else if (!p.ptr.cached) { + durability_removing += bch2_extent_ptr_desired_durability(c, &p); + } else if (!p.ptr.cached && + !((1U << i) & m->data_opts.kill_ptrs)) { bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); + durability_have += bch2_extent_ptr_durability(c, &p); } /* @@ -529,6 +592,29 @@ int bch2_data_update_init(struct btree_trans *trans, i++; } + /* + * If current extent durability is less than io_opts.data_replicas, + * we're not trying to rereplicate the extent up to data_replicas here - + * unless extra_replicas was specified + * + * Increasing replication is an explicit operation triggered by + * rereplicate, currently, so that users don't get an unexpected -ENOSPC + */ + if (durability_have >= io_opts.data_replicas) { + m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; + m->data_opts.rewrite_ptrs = 0; + /* if iter == NULL, it's just a promote */ + if (iter) + ret = bch2_extent_drop_ptrs(trans, iter, k, data_opts); + goto done; + } + + m->op.nr_replicas = min(durability_removing, io_opts.data_replicas - durability_have) + + m->data_opts.extra_replicas; + m->op.nr_replicas_required = m->op.nr_replicas; + + BUG_ON(!m->op.nr_replicas); + if (reserve_sectors) { ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, m->data_opts.extra_replicas @@ -538,14 +624,11 @@ int bch2_data_update_init(struct btree_trans *trans, goto err; } - m->op.nr_replicas += m->data_opts.extra_replicas; - m->op.nr_replicas_required = m->op.nr_replicas; - - BUG_ON(!m->op.nr_replicas); + if (bkey_extent_is_unwritten(k)) { + bch2_update_unwritten_extent(trans, m); + goto done; + } - /* Special handling required: */ - if (bkey_extent_is_unwritten(k)) - return -BCH_ERR_unwritten_extent_update; return 0; err: i = 0; @@ -560,6 +643,9 @@ int bch2_data_update_init(struct btree_trans *trans, bch2_bkey_buf_exit(&m->k, c); bch2_bio_free_pages_pool(c, &m->op.wbio.bio); return ret; +done: + bch2_data_update_exit(m); + return ret ?: -BCH_ERR_data_update_done; } void bch2_data_update_opts_normalize(struct bkey_s_c k, struct data_update_opts *opts) diff --git a/libbcachefs/data_update.h b/libbcachefs/data_update.h index 9dc17b9d8..991095bbd 100644 --- a/libbcachefs/data_update.h +++ b/libbcachefs/data_update.h @@ -32,9 +32,14 @@ int bch2_data_update_index_update(struct bch_write_op *); void bch2_data_update_read_done(struct data_update *, struct bch_extent_crc_unpacked); +int bch2_extent_drop_ptrs(struct btree_trans *, + struct btree_iter *, + struct bkey_s_c, + struct data_update_opts); + void bch2_data_update_exit(struct data_update *); -void bch2_update_unwritten_extent(struct btree_trans *, struct data_update *); -int bch2_data_update_init(struct btree_trans *, struct moving_context *, +int bch2_data_update_init(struct btree_trans *, struct btree_iter *, + struct moving_context *, struct data_update *, struct write_point_specifier, struct bch_io_opts, struct data_update_opts, diff --git a/libbcachefs/errcode.h b/libbcachefs/errcode.h index e42b45293..4d35e5c6c 100644 --- a/libbcachefs/errcode.h +++ b/libbcachefs/errcode.h @@ -160,7 +160,7 @@ x(BCH_ERR_fsck, fsck_repair_unimplemented) \ x(BCH_ERR_fsck, fsck_repair_impossible) \ x(0, restart_recovery) \ - x(0, unwritten_extent_update) \ + x(0, data_update_done) \ x(EINVAL, device_state_not_allowed) \ x(EINVAL, member_info_missing) \ x(EINVAL, mismatched_block_size) \ @@ -208,6 +208,7 @@ x(BCH_ERR_invalid_sb, invalid_sb_members) \ x(BCH_ERR_invalid_sb, invalid_sb_disk_groups) \ x(BCH_ERR_invalid_sb, invalid_sb_replicas) \ + x(BCH_ERR_invalid_sb, invalid_replicas_entry) \ x(BCH_ERR_invalid_sb, invalid_sb_journal) \ x(BCH_ERR_invalid_sb, invalid_sb_journal_seq_blacklist) \ x(BCH_ERR_invalid_sb, invalid_sb_crypt) \ diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c index a864de231..f6c92df55 100644 --- a/libbcachefs/extents.c +++ b/libbcachefs/extents.c @@ -649,37 +649,31 @@ unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) return replicas; } -unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p) +static inline unsigned __extent_ptr_durability(struct bch_dev *ca, struct extent_ptr_decoded *p) { - struct bch_dev *ca; - if (p->ptr.cached) return 0; - ca = bch_dev_bkey_exists(c, p->ptr.dev); - - return ca->mi.durability + - (p->has_ec - ? p->ec.redundancy - : 0); + return p->has_ec + ? p->ec.redundancy + 1 + : ca->mi.durability; } -unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p) +unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p) { - struct bch_dev *ca; + struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev); - if (p->ptr.cached) - return 0; + return __extent_ptr_durability(ca, p); +} - ca = bch_dev_bkey_exists(c, p->ptr.dev); +unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p) +{ + struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev); if (ca->mi.state == BCH_MEMBER_STATE_failed) return 0; - return ca->mi.durability + - (p->has_ec - ? p->ec.redundancy - : 0); + return __extent_ptr_durability(ca, p); } unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k) diff --git a/libbcachefs/io_read.c b/libbcachefs/io_read.c index b833409c7..3281c4dd1 100644 --- a/libbcachefs/io_read.c +++ b/libbcachefs/io_read.c @@ -209,7 +209,7 @@ static struct promote_op *__promote_alloc(struct btree_trans *trans, bio = &op->write.op.wbio.bio; bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0); - ret = bch2_data_update_init(trans, NULL, &op->write, + ret = bch2_data_update_init(trans, NULL, NULL, &op->write, writepoint_hashed((unsigned long) current), opts, (struct data_update_opts) { diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 4ec5d5d38..c2a655235 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -548,6 +548,7 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, struct jset_entry_data_usage *u = container_of(entry, struct jset_entry_data_usage, entry); unsigned bytes = jset_u64s(le16_to_cpu(entry->u64s)) * sizeof(u64); + struct printbuf err = PRINTBUF; int ret = 0; if (journal_entry_err_on(bytes < sizeof(*u) || @@ -556,10 +557,19 @@ static int journal_entry_data_usage_validate(struct bch_fs *c, journal_entry_data_usage_bad_size, "invalid journal entry usage: bad size")) { journal_entry_null_range(entry, vstruct_next(entry)); - return ret; + goto out; } + if (journal_entry_err_on(bch2_replicas_entry_validate(&u->r, c->disk_sb.sb, &err), + c, version, jset, entry, + journal_entry_data_usage_bad_size, + "invalid journal entry usage: %s", err.buf)) { + journal_entry_null_range(entry, vstruct_next(entry)); + goto out; + } +out: fsck_err: + printbuf_exit(&err); return ret; } @@ -676,8 +686,6 @@ static void journal_entry_dev_usage_to_text(struct printbuf *out, struct bch_fs le64_to_cpu(u->d[i].sectors), le64_to_cpu(u->d[i].fragmented)); } - - prt_printf(out, " buckets_ec: %llu", le64_to_cpu(u->buckets_ec)); } static int journal_entry_log_validate(struct bch_fs *c, diff --git a/libbcachefs/lru.c b/libbcachefs/lru.c index e6d081c05..5340f2d0e 100644 --- a/libbcachefs/lru.c +++ b/libbcachefs/lru.c @@ -40,8 +40,8 @@ void bch2_lru_pos_to_text(struct printbuf *out, struct bpos lru) u64_to_bucket(lru.offset).offset); } -static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, - u64 dev_bucket, u64 time, bool set) +static inline int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, + u64 dev_bucket, u64 time, bool set) { return time ? bch2_btree_bit_mod(trans, BTREE_ID_lru, @@ -51,12 +51,12 @@ static int __bch2_lru_set(struct btree_trans *trans, u16 lru_id, int bch2_lru_del(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) { - return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_deleted); + return __bch2_lru_set(trans, lru_id, dev_bucket, time, false); } int bch2_lru_set(struct btree_trans *trans, u16 lru_id, u64 dev_bucket, u64 time) { - return __bch2_lru_set(trans, lru_id, dev_bucket, time, KEY_TYPE_set); + return __bch2_lru_set(trans, lru_id, dev_bucket, time, true); } int bch2_lru_change(struct btree_trans *trans, @@ -66,8 +66,8 @@ int bch2_lru_change(struct btree_trans *trans, if (old_time == new_time) return 0; - return bch2_lru_del(trans, lru_id, dev_bucket, old_time) ?: - bch2_lru_set(trans, lru_id, dev_bucket, new_time); + return __bch2_lru_set(trans, lru_id, dev_bucket, old_time, false) ?: + __bch2_lru_set(trans, lru_id, dev_bucket, new_time, true); } static const char * const bch2_lru_types[] = { @@ -77,10 +77,11 @@ static const char * const bch2_lru_types[] = { NULL }; -static int bch2_check_lru_key(struct btree_trans *trans, - struct btree_iter *lru_iter, - struct bkey_s_c lru_k, - struct bpos *last_flushed_pos) +/* Returns 1 if key has been deleted */ +int bch2_check_lru_key(struct btree_trans *trans, + struct btree_iter *lru_iter, + struct bkey_s_c lru_k, + struct bpos *last_flushed_pos) { struct bch_fs *c = trans->c; struct btree_iter iter; @@ -89,7 +90,6 @@ static int bch2_check_lru_key(struct btree_trans *trans, const struct bch_alloc_v4 *a; struct printbuf buf1 = PRINTBUF; struct printbuf buf2 = PRINTBUF; - enum bch_lru_type type = lru_type(lru_k); struct bpos alloc_pos = u64_to_bucket(lru_k.k->p.offset); u64 idx; int ret; @@ -98,7 +98,7 @@ static int bch2_check_lru_key(struct btree_trans *trans, lru_entry_to_invalid_bucket, "lru key points to nonexistent device:bucket %llu:%llu", alloc_pos.inode, alloc_pos.offset)) - return bch2_btree_delete_at(trans, lru_iter, 0); + goto delete; k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc, alloc_pos, 0); ret = bkey_err(k); @@ -107,6 +107,7 @@ static int bch2_check_lru_key(struct btree_trans *trans, a = bch2_alloc_to_v4(k, &a_convert); + enum bch_lru_type type = lru_type(lru_k); switch (type) { case BCH_LRU_read: idx = alloc_lru_idx_read(*a); @@ -114,6 +115,9 @@ static int bch2_check_lru_key(struct btree_trans *trans, case BCH_LRU_fragmentation: idx = a->fragmentation_lru; break; + default: + /* unknown LRU type, don't check: */ + goto out; } if (lru_k.k->type != KEY_TYPE_set || @@ -125,16 +129,18 @@ static int bch2_check_lru_key(struct btree_trans *trans, goto out; } - if (c->opts.reconstruct_alloc || + if ((c->opts.reconstruct_alloc && + c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_lrus) || fsck_err(c, lru_entry_bad, "incorrect lru entry: lru %s time %llu\n" " %s\n" - " for %s", + "for\n" + " %s", bch2_lru_types[type], lru_pos_time(lru_k.k->p), (bch2_bkey_val_to_text(&buf1, c, lru_k), buf1.buf), (bch2_bkey_val_to_text(&buf2, c, k), buf2.buf))) - ret = bch2_btree_delete_at(trans, lru_iter, 0); + goto delete; } out: err: @@ -143,6 +149,14 @@ static int bch2_check_lru_key(struct btree_trans *trans, printbuf_exit(&buf2); printbuf_exit(&buf1); return ret; +delete: + ret = bch2_btree_delete_at(trans, lru_iter, 0) ?: + bch2_trans_commit(trans, NULL, NULL, + BCH_WATERMARK_btree| + BCH_TRANS_COMMIT_lazy_rw| + BCH_TRANS_COMMIT_no_enospc) ?: + 1; + goto out; } int bch2_check_lrus(struct bch_fs *c) @@ -150,15 +164,14 @@ int bch2_check_lrus(struct bch_fs *c) struct btree_iter iter; struct bkey_s_c k; struct bpos last_flushed_pos = POS_MIN; - int ret = 0; - ret = bch2_trans_run(c, - for_each_btree_key_commit(trans, iter, - BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, - NULL, NULL, BCH_TRANS_COMMIT_no_enospc|BCH_TRANS_COMMIT_lazy_rw, - bch2_check_lru_key(trans, &iter, k, &last_flushed_pos))); - if (ret) - bch_err_fn(c, ret); - return ret; + int ret = bch2_trans_run(c, + for_each_btree_key2(trans, iter, + BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, ({ + int ret2 = bch2_check_lru_key(trans, &iter, k, &last_flushed_pos); + ret2 < 0 ? ret2 : 0; + }))); + bch_err_fn(c, ret); + return ret; } diff --git a/libbcachefs/lru.h b/libbcachefs/lru.h index 429dca816..014dba7cd 100644 --- a/libbcachefs/lru.h +++ b/libbcachefs/lru.h @@ -64,6 +64,8 @@ int bch2_lru_del(struct btree_trans *, u16, u64, u64); int bch2_lru_set(struct btree_trans *, u16, u64, u64); int bch2_lru_change(struct btree_trans *, u16, u64, u64, u64); +int bch2_check_lru_key(struct btree_trans *, struct btree_iter *, + struct bkey_s_c, struct bpos *); int bch2_check_lrus(struct bch_fs *); #endif /* _BCACHEFS_LRU_H */ diff --git a/libbcachefs/move.c b/libbcachefs/move.c index cf36f2b07..c5518a866 100644 --- a/libbcachefs/move.c +++ b/libbcachefs/move.c @@ -173,6 +173,7 @@ void bch2_move_ctxt_wait_for_io(struct moving_context *ctxt) static void bch2_moving_ctxt_flush_all(struct moving_context *ctxt) { move_ctxt_wait_event(ctxt, list_empty(&ctxt->reads)); + bch2_trans_unlock_long(ctxt->trans); closure_sync(&ctxt->cl); } @@ -235,49 +236,6 @@ void bch2_move_stats_init(struct bch_move_stats *stats, const char *name) scnprintf(stats->name, sizeof(stats->name), "%s", name); } -static int bch2_extent_drop_ptrs(struct btree_trans *trans, - struct btree_iter *iter, - struct bkey_s_c k, - struct data_update_opts data_opts) -{ - struct bch_fs *c = trans->c; - struct bkey_i *n; - int ret; - - n = bch2_bkey_make_mut_noupdate(trans, k); - ret = PTR_ERR_OR_ZERO(n); - if (ret) - return ret; - - while (data_opts.kill_ptrs) { - unsigned i = 0, drop = __fls(data_opts.kill_ptrs); - struct bch_extent_ptr *ptr; - - bch2_bkey_drop_ptrs(bkey_i_to_s(n), ptr, i++ == drop); - data_opts.kill_ptrs ^= 1U << drop; - } - - /* - * If the new extent no longer has any pointers, bch2_extent_normalize() - * will do the appropriate thing with it (turning it into a - * KEY_TYPE_error key, or just a discard if it was a cached extent) - */ - bch2_extent_normalize(c, bkey_i_to_s(n)); - - /* - * Since we're not inserting through an extent iterator - * (BTREE_ITER_ALL_SNAPSHOTS iterators aren't extent iterators), - * we aren't using the extent overwrite path to delete, we're - * just using the normal key deletion path: - */ - if (bkey_deleted(&n->k)) - n->k.size = 0; - - return bch2_trans_relock(trans) ?: - bch2_trans_update(trans, iter, n, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: - bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); -} - int bch2_move_extent(struct moving_context *ctxt, struct move_bucket_in_flight *bucket_in_flight, struct btree_iter *iter, @@ -347,19 +305,11 @@ int bch2_move_extent(struct moving_context *ctxt, io->rbio.bio.bi_iter.bi_sector = bkey_start_offset(k.k); io->rbio.bio.bi_end_io = move_read_endio; - ret = bch2_data_update_init(trans, ctxt, &io->write, ctxt->wp, + ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, io_opts, data_opts, iter->btree_id, k); - if (ret && ret != -BCH_ERR_unwritten_extent_update) + if (ret) goto err_free_pages; - if (ret == -BCH_ERR_unwritten_extent_update) { - bch2_update_unwritten_extent(trans, &io->write); - move_free(io); - return 0; - } - - BUG_ON(ret); - io->write.op.end_io = move_write_done; if (ctxt->rate) @@ -403,6 +353,9 @@ int bch2_move_extent(struct moving_context *ctxt, err_free: kfree(io); err: + if (ret == -BCH_ERR_data_update_done) + return 0; + this_cpu_inc(c->counters[BCH_COUNTER_move_extent_alloc_mem_fail]); trace_move_extent_alloc_mem_fail2(c, k); return ret; @@ -506,22 +459,13 @@ int bch2_move_ratelimit(struct moving_context *ctxt) do { delay = ctxt->rate ? bch2_ratelimit_delay(ctxt->rate) : 0; - - if (delay) { - if (delay > HZ / 10) - bch2_trans_unlock_long(ctxt->trans); - else - bch2_trans_unlock(ctxt->trans); - set_current_state(TASK_INTERRUPTIBLE); - } - - if (kthread_should_stop()) { - __set_current_state(TASK_RUNNING); + if (kthread_should_stop()) return 1; - } if (delay) - schedule_timeout(delay); + move_ctxt_wait_event_timeout(ctxt, + freezing(current) || kthread_should_stop(), + delay); if (unlikely(freezing(current))) { bch2_moving_ctxt_flush_all(ctxt); @@ -729,7 +673,7 @@ int __bch2_evacuate_bucket(struct moving_context *ctxt, } a = bch2_alloc_to_v4(k, &a_convert); - dirty_sectors = a->dirty_sectors; + dirty_sectors = bch2_bucket_sectors_dirty(*a); bucket_size = bch_dev_bkey_exists(c, bucket.inode)->mi.bucket_size; fragmentation = a->fragmentation_lru; diff --git a/libbcachefs/move.h b/libbcachefs/move.h index cedde6ee9..531965674 100644 --- a/libbcachefs/move.h +++ b/libbcachefs/move.h @@ -38,6 +38,25 @@ struct moving_context { wait_queue_head_t wait; }; +#define move_ctxt_wait_event_timeout(_ctxt, _cond, _timeout) \ +({ \ + int _ret = 0; \ + while (true) { \ + bool cond_finished = false; \ + bch2_moving_ctxt_do_pending_writes(_ctxt); \ + \ + if (_cond) \ + break; \ + bch2_trans_unlock_long((_ctxt)->trans); \ + _ret = __wait_event_timeout((_ctxt)->wait, \ + bch2_moving_ctxt_next_pending_write(_ctxt) || \ + (cond_finished = (_cond)), _timeout); \ + if (_ret || ( cond_finished)) \ + break; \ + } \ + _ret; \ +}) + #define move_ctxt_wait_event(_ctxt, _cond) \ do { \ bool cond_finished = false; \ diff --git a/libbcachefs/movinggc.c b/libbcachefs/movinggc.c index e884324bd..d079ba7aa 100644 --- a/libbcachefs/movinggc.c +++ b/libbcachefs/movinggc.c @@ -91,7 +91,7 @@ static int bch2_bucket_is_movable(struct btree_trans *trans, a = bch2_alloc_to_v4(k, &_a); b->k.gen = a->gen; - b->sectors = a->dirty_sectors; + b->sectors = bch2_bucket_sectors_dirty(*a); ret = data_type_movable(a->data_type) && a->fragmentation_lru && @@ -149,6 +149,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, struct bkey_s_c k; size_t nr_to_get = max_t(size_t, 16U, buckets_in_flight->nr / 4); size_t saw = 0, in_flight = 0, not_movable = 0, sectors = 0; + struct bpos last_flushed_pos = POS_MIN; int ret; move_buckets_wait(ctxt, buckets_in_flight, false); @@ -165,11 +166,16 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, lru_pos(BCH_LRU_FRAGMENTATION_START, 0, 0), lru_pos(BCH_LRU_FRAGMENTATION_START, U64_MAX, LRU_TIME_MAX), 0, k, ({ - struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) }; - int ret2 = 0; + int ret2 = bch2_check_lru_key(trans, &iter, k, &last_flushed_pos); + if (ret2) { + ret2 = ret2 < 0 ? ret2 : 0; + goto next; + } saw++; + struct move_bucket b = { .k.bucket = u64_to_bucket(k.k->p.offset) }; + if (!bch2_bucket_is_movable(trans, &b, lru_pos_time(k.k->p))) not_movable++; else if (bucket_in_flight(buckets_in_flight, b.k)) @@ -179,6 +185,7 @@ static int bch2_copygc_get_buckets(struct moving_context *ctxt, if (ret2 >= 0) sectors += b.sectors; } +next: ret2; })); diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c index db2139c05..fefa4395f 100644 --- a/libbcachefs/rebalance.c +++ b/libbcachefs/rebalance.c @@ -171,6 +171,20 @@ static struct bkey_s_c next_rebalance_extent(struct btree_trans *trans, return bkey_s_c_null; } + if (trace_rebalance_extent_enabled()) { + struct printbuf buf = PRINTBUF; + + prt_str(&buf, "target="); + bch2_target_to_text(&buf, c, r->target); + prt_str(&buf, " compression="); + prt_str(&buf, bch2_compression_opts[r->compression]); + prt_str(&buf, " "); + bch2_bkey_val_to_text(&buf, c, k); + + trace_rebalance_extent(c, buf.buf); + printbuf_exit(&buf); + } + return k; } diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c index a94f2b5ed..5f4f76e67 100644 --- a/libbcachefs/recovery.c +++ b/libbcachefs/recovery.c @@ -302,8 +302,6 @@ static int journal_replay_entry_early(struct bch_fs *c, struct bch_dev *ca = bch_dev_bkey_exists(c, le32_to_cpu(u->dev)); unsigned i, nr_types = jset_entry_dev_usage_nr_types(u); - ca->usage_base->buckets_ec = le64_to_cpu(u->buckets_ec); - for (i = 0; i < min_t(unsigned, nr_types, BCH_DATA_NR); i++) { ca->usage_base->d[i].buckets = le64_to_cpu(u->d[i].buckets); ca->usage_base->d[i].sectors = le64_to_cpu(u->d[i].sectors); diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c index 820f99898..ccb776e04 100644 --- a/libbcachefs/replicas.c +++ b/libbcachefs/replicas.c @@ -68,6 +68,33 @@ void bch2_replicas_entry_to_text(struct printbuf *out, prt_printf(out, "]"); } +int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, + struct bch_sb *sb, + struct printbuf *err) +{ + if (!r->nr_devs) { + prt_printf(err, "no devices in entry "); + goto bad; + } + + if (r->nr_required > 1 && + r->nr_required >= r->nr_devs) { + prt_printf(err, "bad nr_required in entry "); + goto bad; + } + + for (unsigned i = 0; i < r->nr_devs; i++) + if (!bch2_dev_exists(sb, r->devs[i])) { + prt_printf(err, "invalid device %u in entry ", r->devs[i]); + goto bad; + } + + return 0; +bad: + bch2_replicas_entry_to_text(err, r); + return -BCH_ERR_invalid_replicas_entry; +} + void bch2_cpu_replicas_to_text(struct printbuf *out, struct bch_replicas_cpu *r) { @@ -163,7 +190,8 @@ void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e, } static struct bch_replicas_cpu -cpu_replicas_add_entry(struct bch_replicas_cpu *old, +cpu_replicas_add_entry(struct bch_fs *c, + struct bch_replicas_cpu *old, struct bch_replicas_entry_v1 *new_entry) { unsigned i; @@ -173,6 +201,9 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old, replicas_entry_bytes(new_entry)), }; + for (i = 0; i < new_entry->nr_devs; i++) + BUG_ON(!bch2_dev_exists2(c, new_entry->devs[i])); + BUG_ON(!new_entry->data_type); verify_replicas_entry(new_entry); @@ -382,7 +413,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, if (c->replicas_gc.entries && !__replicas_has_entry(&c->replicas_gc, new_entry)) { - new_gc = cpu_replicas_add_entry(&c->replicas_gc, new_entry); + new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); if (!new_gc.entries) { ret = -BCH_ERR_ENOMEM_cpu_replicas; goto err; @@ -390,7 +421,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c, } if (!__replicas_has_entry(&c->replicas, new_entry)) { - new_r = cpu_replicas_add_entry(&c->replicas, new_entry); + new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); if (!new_r.entries) { ret = -BCH_ERR_ENOMEM_cpu_replicas; goto err; @@ -598,7 +629,7 @@ int bch2_replicas_set_usage(struct bch_fs *c, if (idx < 0) { struct bch_replicas_cpu n; - n = cpu_replicas_add_entry(&c->replicas, r); + n = cpu_replicas_add_entry(c, &c->replicas, r); if (!n.entries) return -BCH_ERR_ENOMEM_cpu_replicas; @@ -797,7 +828,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, struct bch_sb *sb, struct printbuf *err) { - unsigned i, j; + unsigned i; sort_cmp_size(cpu_r->entries, cpu_r->nr, @@ -808,31 +839,9 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, struct bch_replicas_entry_v1 *e = cpu_replicas_entry(cpu_r, i); - if (e->data_type >= BCH_DATA_NR) { - prt_printf(err, "invalid data type in entry "); - bch2_replicas_entry_to_text(err, e); - return -BCH_ERR_invalid_sb_replicas; - } - - if (!e->nr_devs) { - prt_printf(err, "no devices in entry "); - bch2_replicas_entry_to_text(err, e); - return -BCH_ERR_invalid_sb_replicas; - } - - if (e->nr_required > 1 && - e->nr_required >= e->nr_devs) { - prt_printf(err, "bad nr_required in entry "); - bch2_replicas_entry_to_text(err, e); - return -BCH_ERR_invalid_sb_replicas; - } - - for (j = 0; j < e->nr_devs; j++) - if (!bch2_dev_exists(sb, e->devs[j])) { - prt_printf(err, "invalid device %u in entry ", e->devs[j]); - bch2_replicas_entry_to_text(err, e); - return -BCH_ERR_invalid_sb_replicas; - } + int ret = bch2_replicas_entry_validate(e, sb, err); + if (ret) + return ret; if (i + 1 < cpu_r->nr) { struct bch_replicas_entry_v1 *n = diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h index b2bb12a9b..654a4b26d 100644 --- a/libbcachefs/replicas.h +++ b/libbcachefs/replicas.h @@ -9,6 +9,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *); void bch2_replicas_entry_to_text(struct printbuf *, struct bch_replicas_entry_v1 *); +int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *, + struct bch_sb *, struct printbuf *); void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *); static inline struct bch_replicas_entry_v1 * diff --git a/libbcachefs/sb-clean.c b/libbcachefs/sb-clean.c index fedc9e102..8dc0e3db1 100644 --- a/libbcachefs/sb-clean.c +++ b/libbcachefs/sb-clean.c @@ -256,7 +256,6 @@ void bch2_journal_super_entries_add_common(struct bch_fs *c, u->entry.type = BCH_JSET_ENTRY_dev_usage; u->dev = cpu_to_le32(dev); - u->buckets_ec = cpu_to_le64(ca->usage_base->buckets_ec); for (i = 0; i < BCH_DATA_NR; i++) { u->d[i].buckets = cpu_to_le64(ca->usage_base->d[i].buckets); diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c index bed0f857f..259af07f4 100644 --- a/libbcachefs/sb-members.c +++ b/libbcachefs/sb-members.c @@ -259,6 +259,11 @@ static void member_to_text(struct printbuf *out, prt_printf(out, "(none)"); prt_newline(out); + prt_str(out, "Durability:"); + prt_tab(out); + prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m)); + prt_newline(out); + prt_printf(out, "Discard:"); prt_tab(out); prt_printf(out, "%llu", BCH_MEMBER_DISCARD(&m)); diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c index fa8534782..512d56657 100644 --- a/libbcachefs/super-io.c +++ b/libbcachefs/super-io.c @@ -658,7 +658,7 @@ static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf return 0; } -int __bch2_read_super(const char *path, struct bch_opts *opts, +static int __bch2_read_super(const char *path, struct bch_opts *opts, struct bch_sb_handle *sb, bool ignore_notbchfs_msg) { u64 offset = opt_get(*opts, sb); diff --git a/libbcachefs/super.c b/libbcachefs/super.c index 91f757173..552d55dd9 100644 --- a/libbcachefs/super.c +++ b/libbcachefs/super.c @@ -270,6 +270,8 @@ void bch2_fs_read_only(struct bch_fs *c) BUG_ON(test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags)); + bch_verbose(c, "going read-only"); + /* * Block new foreground-end write operations from starting - any new * writes will return -EROFS: @@ -297,13 +299,21 @@ void bch2_fs_read_only(struct bch_fs *c) test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags) || test_bit(BCH_FS_EMERGENCY_RO, &c->flags)); + bool writes_disabled = test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); + if (writes_disabled) + bch_verbose(c, "finished waiting for writes to stop"); + __bch2_fs_read_only(c); wait_event(bch2_read_only_wait, test_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags)); + if (!writes_disabled) + bch_verbose(c, "finished waiting for writes to stop"); + clear_bit(BCH_FS_WRITE_DISABLE_COMPLETE, &c->flags); clear_bit(BCH_FS_GOING_RO, &c->flags); + clear_bit(BCH_FS_RW, &c->flags); if (!bch2_journal_error(&c->journal) && !test_bit(BCH_FS_ERROR, &c->flags) && @@ -319,9 +329,9 @@ void bch2_fs_read_only(struct bch_fs *c) bch_verbose(c, "marking filesystem clean"); bch2_fs_mark_clean(c); + } else { + bch_verbose(c, "done going read-only, filesystem not clean"); } - - clear_bit(BCH_FS_RW, &c->flags); } static void bch2_fs_read_only_work(struct work_struct *work) @@ -424,6 +434,18 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) bch2_dev_allocator_add(c, ca); bch2_recalc_capacity(c); + set_bit(BCH_FS_RW, &c->flags); + set_bit(BCH_FS_WAS_RW, &c->flags); + +#ifndef BCH_WRITE_REF_DEBUG + percpu_ref_reinit(&c->writes); +#else + for (i = 0; i < BCH_WRITE_REF_NR; i++) { + BUG_ON(atomic_long_read(&c->writes[i])); + atomic_long_inc(&c->writes[i]); + } +#endif + ret = bch2_gc_thread_start(c); if (ret) { bch_err(c, "error starting gc thread"); @@ -440,24 +462,16 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early) goto err; } -#ifndef BCH_WRITE_REF_DEBUG - percpu_ref_reinit(&c->writes); -#else - for (i = 0; i < BCH_WRITE_REF_NR; i++) { - BUG_ON(atomic_long_read(&c->writes[i])); - atomic_long_inc(&c->writes[i]); - } -#endif - set_bit(BCH_FS_RW, &c->flags); - set_bit(BCH_FS_WAS_RW, &c->flags); - bch2_do_discards(c); bch2_do_invalidates(c); bch2_do_stripe_deletes(c); bch2_do_pending_node_rewrites(c); return 0; err: - __bch2_fs_read_only(c); + if (test_bit(BCH_FS_RW, &c->flags)) + bch2_fs_read_only(c); + else + __bch2_fs_read_only(c); return ret; } diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c index 264c46b45..7223418d3 100644 --- a/libbcachefs/sysfs.c +++ b/libbcachefs/sysfs.c @@ -258,15 +258,16 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c struct btree_iter iter; struct bkey_s_c k; enum btree_id id; - u64 nr_uncompressed_extents = 0, - nr_compressed_extents = 0, - nr_incompressible_extents = 0, - uncompressed_sectors = 0, - incompressible_sectors = 0, - compressed_sectors_compressed = 0, - compressed_sectors_uncompressed = 0; + struct compression_type_stats { + u64 nr_extents; + u64 sectors_compressed; + u64 sectors_uncompressed; + } s[BCH_COMPRESSION_TYPE_NR]; + u64 compressed_incompressible = 0; int ret = 0; + memset(s, 0, sizeof(s)); + if (!test_bit(BCH_FS_STARTED, &c->flags)) return -EPERM; @@ -279,36 +280,30 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c for_each_btree_key(trans, iter, id, POS_MIN, BTREE_ITER_ALL_SNAPSHOTS, k, ret) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + struct bch_extent_crc_unpacked crc; const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - bool compressed = false, uncompressed = false, incompressible = false; - - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - switch (p.crc.compression_type) { - case BCH_COMPRESSION_TYPE_none: - uncompressed = true; - uncompressed_sectors += k.k->size; - break; - case BCH_COMPRESSION_TYPE_incompressible: - incompressible = true; - incompressible_sectors += k.k->size; - break; - default: - compressed_sectors_compressed += - p.crc.compressed_size; - compressed_sectors_uncompressed += - p.crc.uncompressed_size; - compressed = true; - break; + bool compressed = false, incompressible = false; + + bkey_for_each_crc(k.k, ptrs, crc, entry) { + incompressible |= crc.compression_type == BCH_COMPRESSION_TYPE_incompressible; + compressed |= crc_is_compressed(crc); + + if (crc_is_compressed(crc)) { + s[crc.compression_type].nr_extents++; + s[crc.compression_type].sectors_compressed += crc.compressed_size; + s[crc.compression_type].sectors_uncompressed += crc.uncompressed_size; } } - if (incompressible) - nr_incompressible_extents++; - else if (uncompressed) - nr_uncompressed_extents++; - else if (compressed) - nr_compressed_extents++; + compressed_incompressible += compressed && incompressible; + + if (!compressed) { + unsigned t = incompressible ? BCH_COMPRESSION_TYPE_incompressible : 0; + + s[t].nr_extents++; + s[t].sectors_compressed += k.k->size; + s[t].sectors_uncompressed += k.k->size; + } } bch2_trans_iter_exit(trans, &iter); } @@ -318,26 +313,45 @@ static int bch2_compression_stats_to_text(struct printbuf *out, struct bch_fs *c if (ret) return ret; - prt_printf(out, "uncompressed:\n"); - prt_printf(out, " nr extents: %llu\n", nr_uncompressed_extents); - prt_printf(out, " size: "); - prt_human_readable_u64(out, uncompressed_sectors << 9); - prt_printf(out, "\n"); + prt_str(out, "type"); + printbuf_tabstop_push(out, 12); + prt_tab(out); - prt_printf(out, "compressed:\n"); - prt_printf(out, " nr extents: %llu\n", nr_compressed_extents); - prt_printf(out, " compressed size: "); - prt_human_readable_u64(out, compressed_sectors_compressed << 9); - prt_printf(out, "\n"); - prt_printf(out, " uncompressed size: "); - prt_human_readable_u64(out, compressed_sectors_uncompressed << 9); - prt_printf(out, "\n"); + prt_str(out, "compressed"); + printbuf_tabstop_push(out, 16); + prt_tab_rjust(out); + + prt_str(out, "uncompressed"); + printbuf_tabstop_push(out, 16); + prt_tab_rjust(out); + + prt_str(out, "average extent size"); + printbuf_tabstop_push(out, 24); + prt_tab_rjust(out); + prt_newline(out); + + for (unsigned i = 0; i < ARRAY_SIZE(s); i++) { + prt_str(out, bch2_compression_types[i]); + prt_tab(out); + + prt_human_readable_u64(out, s[i].sectors_compressed << 9); + prt_tab_rjust(out); + + prt_human_readable_u64(out, s[i].sectors_uncompressed << 9); + prt_tab_rjust(out); + + prt_human_readable_u64(out, s[i].nr_extents + ? div_u64(s[i].sectors_uncompressed << 9, s[i].nr_extents) + : 0); + prt_tab_rjust(out); + prt_newline(out); + } + + if (compressed_incompressible) { + prt_printf(out, "%llu compressed & incompressible extents", compressed_incompressible); + prt_newline(out); + } - prt_printf(out, "incompressible:\n"); - prt_printf(out, " nr extents: %llu\n", nr_incompressible_extents); - prt_printf(out, " size: "); - prt_human_readable_u64(out, incompressible_sectors << 9); - prt_printf(out, "\n"); return 0; } @@ -786,32 +800,7 @@ static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) printbuf_tabstop_push(out, 16); printbuf_tabstop_push(out, 16); - prt_tab(out); - prt_str(out, "buckets"); - prt_tab_rjust(out); - prt_str(out, "sectors"); - prt_tab_rjust(out); - prt_str(out, "fragmented"); - prt_tab_rjust(out); - prt_newline(out); - - for (i = 0; i < BCH_DATA_NR; i++) { - prt_str(out, bch2_data_types[i]); - prt_tab(out); - prt_u64(out, stats.d[i].buckets); - prt_tab_rjust(out); - prt_u64(out, stats.d[i].sectors); - prt_tab_rjust(out); - prt_u64(out, stats.d[i].fragmented); - prt_tab_rjust(out); - prt_newline(out); - } - - prt_str(out, "ec"); - prt_tab(out); - prt_u64(out, stats.buckets_ec); - prt_tab_rjust(out); - prt_newline(out); + bch2_dev_usage_to_text(out, &stats); prt_newline(out); diff --git a/libbcachefs/trace.h b/libbcachefs/trace.h index 7b24e7fe3..4980cfdd1 100644 --- a/libbcachefs/trace.h +++ b/libbcachefs/trace.h @@ -32,19 +32,21 @@ DECLARE_EVENT_CLASS(bpos, TP_printk("%llu:%llu:%u", __entry->p_inode, __entry->p_offset, __entry->p_snapshot) ); -DECLARE_EVENT_CLASS(bkey, - TP_PROTO(struct bch_fs *c, const char *k), - TP_ARGS(c, k), +DECLARE_EVENT_CLASS(str, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str), TP_STRUCT__entry( - __string(k, k ) + __field(dev_t, dev ) + __string(str, str ) ), TP_fast_assign( - __assign_str(k, k); + __entry->dev = c->dev; + __assign_str(str, str); ), - TP_printk("%s", __get_str(k)) + TP_printk("%d,%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(str)) ); DECLARE_EVENT_CLASS(btree_node, @@ -736,22 +738,22 @@ TRACE_EVENT(bucket_evacuate, __entry->dev_idx, __entry->bucket) ); -DEFINE_EVENT(bkey, move_extent, +DEFINE_EVENT(str, move_extent, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); -DEFINE_EVENT(bkey, move_extent_read, +DEFINE_EVENT(str, move_extent_read, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); -DEFINE_EVENT(bkey, move_extent_write, +DEFINE_EVENT(str, move_extent_write, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); -DEFINE_EVENT(bkey, move_extent_finish, +DEFINE_EVENT(str, move_extent_finish, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); @@ -773,7 +775,7 @@ TRACE_EVENT(move_extent_fail, TP_printk("%d:%d %s", MAJOR(__entry->dev), MINOR(__entry->dev), __get_str(msg)) ); -DEFINE_EVENT(bkey, move_extent_alloc_mem_fail, +DEFINE_EVENT(str, move_extent_alloc_mem_fail, TP_PROTO(struct bch_fs *c, const char *k), TP_ARGS(c, k) ); @@ -1366,6 +1368,16 @@ TRACE_EVENT(write_buffer_flush_slowpath, TP_printk("%zu/%zu", __entry->slowpath, __entry->total) ); +DEFINE_EVENT(str, rebalance_extent, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + +DEFINE_EVENT(str, data_update, + TP_PROTO(struct bch_fs *c, const char *str), + TP_ARGS(c, str) +); + #endif /* _TRACE_BCACHEFS_H */ /* This part must be outside protection */