]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
bcachefs: Allocation code refactoring
authorKent Overstreet <kent.overstreet@gmail.com>
Sat, 6 Oct 2018 08:12:42 +0000 (04:12 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:10 +0000 (17:08 -0400)
bch2_alloc_sectors_start() was a nightmare to work with - it's got some
tricky stuff to do, since it wants to use the buckets the writepoint
already has, unless they're not in the target it wants to write to,
unless it can't allocate from any other devices in which case it will
use those buckets if it has to - et cetera.

This restructures the code to start with a new empty list of open
buckets we're going to use for the new allocation, pulling buckets from
the write point's list as we decide that we really are going to use
them - making the code somewhat more functional and drastically easier
to understand.

Also fixes a bug where we could end up waiting on c->freelist_wait
(because allocating from one device failed) but return success from
bch2_bucket_alloc(), because allocating from a different device
succeeded.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
12 files changed:
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_foreground.c
fs/bcachefs/alloc_foreground.h
fs/bcachefs/alloc_types.h
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/btree_update_interior.h
fs/bcachefs/io.c
fs/bcachefs/io.h
fs/bcachefs/io_types.h
fs/bcachefs/journal.c

index d22b2b72b0d1c5dd7e1fb52338377c6404f5ef9b..45e8b124a9f38ff5ed6689362ef6c8d7cef25493 100644 (file)
@@ -1101,7 +1101,7 @@ void bch2_dev_allocator_remove(struct bch_fs *c, struct bch_dev *ca)
                struct btree_alloc *a =
                        &c->btree_reserve_cache[--c->btree_reserve_cache_nr];
 
-               bch2_open_bucket_put_refs(c, &a->ob.nr, a->ob.refs);
+               bch2_open_buckets_put(c, &a->ob);
        }
        mutex_unlock(&c->btree_reserve_cache_lock);
 
index be25e01a7d00870518bac677dbdbee64c440c9bf..562c1317aa9e03bf9dcb34ece3e098aa26beac4d 100644 (file)
 #include <linux/rcupdate.h>
 
 enum bucket_alloc_ret {
-       ALLOC_SUCCESS           = 0,
-       OPEN_BUCKETS_EMPTY      = -1,
-       FREELIST_EMPTY          = -2,   /* Allocator thread not keeping up */
-       NO_DEVICES              = -3,   /* -EROFS */
+       ALLOC_SUCCESS,
+       OPEN_BUCKETS_EMPTY,
+       FREELIST_EMPTY,         /* Allocator thread not keeping up */
 };
 
 /*
@@ -129,6 +128,43 @@ static struct open_bucket *bch2_open_bucket_alloc(struct bch_fs *c)
        return ob;
 }
 
+static void open_bucket_free_unused(struct bch_fs *c,
+                                   struct write_point *wp,
+                                   struct open_bucket *ob)
+{
+       struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+
+       BUG_ON(ca->open_buckets_partial_nr >=
+              ARRAY_SIZE(ca->open_buckets_partial));
+
+       if (wp->type == BCH_DATA_USER) {
+               spin_lock(&c->freelist_lock);
+               ob->on_partial_list = true;
+               ca->open_buckets_partial[ca->open_buckets_partial_nr++] =
+                       ob - c->open_buckets;
+               spin_unlock(&c->freelist_lock);
+
+               closure_wake_up(&c->open_buckets_wait);
+               closure_wake_up(&c->freelist_wait);
+       } else {
+               bch2_open_bucket_put(c, ob);
+       }
+}
+
+static void verify_not_stale(struct bch_fs *c, const struct open_buckets *obs)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+       struct open_bucket *ob;
+       unsigned i;
+
+       open_bucket_for_each(c, obs, ob, i) {
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
+
+               BUG_ON(ptr_stale(ca, &ob->ptr));
+       }
+#endif
+}
+
 /* _only_ for allocating the journal on a new device: */
 long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
 {
@@ -164,10 +200,10 @@ static inline unsigned open_buckets_reserved(enum alloc_reserve reserve)
  *
  * Returns index of bucket on success, 0 on failure
  * */
-int bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
-                     enum alloc_reserve reserve,
-                     bool may_alloc_partial,
-                     struct closure *cl)
+struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
+                                     enum alloc_reserve reserve,
+                                     bool may_alloc_partial,
+                                     struct closure *cl)
 {
        struct bucket_array *buckets;
        struct open_bucket *ob;
@@ -177,10 +213,11 @@ int bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
 
        if (may_alloc_partial &&
            ca->open_buckets_partial_nr) {
-               int ret = ca->open_buckets_partial[--ca->open_buckets_partial_nr];
-               c->open_buckets[ret].on_partial_list = false;
+               ob = c->open_buckets +
+                       ca->open_buckets_partial[--ca->open_buckets_partial_nr];
+               ob->on_partial_list = false;
                spin_unlock(&c->freelist_lock);
-               return ret;
+               return ob;
        }
 
        if (unlikely(c->open_buckets_nr_free <= open_buckets_reserved(reserve))) {
@@ -188,7 +225,7 @@ int bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
                        closure_wait(&c->open_buckets_wait, cl);
                spin_unlock(&c->freelist_lock);
                trace_open_bucket_alloc_fail(ca, reserve);
-               return OPEN_BUCKETS_EMPTY;
+               return ERR_PTR(-OPEN_BUCKETS_EMPTY);
        }
 
        if (likely(fifo_pop(&ca->free[RESERVE_NONE], bucket)))
@@ -219,7 +256,7 @@ int bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
        spin_unlock(&c->freelist_lock);
 
        trace_bucket_alloc_fail(ca, reserve);
-       return FREELIST_EMPTY;
+       return ERR_PTR(-FREELIST_EMPTY);
 out:
        verify_not_on_freelist(c, ca, bucket);
 
@@ -245,7 +282,7 @@ out:
        bch2_wake_allocator(ca);
 
        trace_bucket_alloc(ca, reserve);
-       return ob - c->open_buckets;
+       return ob;
 }
 
 static int __dev_alloc_cmp(struct write_point *wp,
@@ -292,155 +329,114 @@ void bch2_wp_rescale(struct bch_fs *c, struct bch_dev *ca,
                *v = *v < scale ? 0 : *v - scale;
 }
 
-static enum bucket_alloc_ret bch2_bucket_alloc_set(struct bch_fs *c,
-                                       struct write_point *wp,
-                                       unsigned nr_replicas,
-                                       enum alloc_reserve reserve,
-                                       struct bch_devs_mask *devs,
-                                       struct closure *cl)
+static int bch2_bucket_alloc_set(struct bch_fs *c,
+                                struct open_buckets *ptrs,
+                                struct write_point *wp,
+                                struct bch_devs_mask *devs_may_alloc,
+                                unsigned nr_replicas,
+                                unsigned *nr_effective,
+                                bool *have_cache,
+                                enum alloc_reserve reserve,
+                                struct closure *cl)
 {
-       enum bucket_alloc_ret ret = NO_DEVICES;
-       struct dev_alloc_list devs_sorted;
+       struct dev_alloc_list devs_sorted =
+               bch2_wp_alloc_list(c, wp, devs_may_alloc);
        struct bch_dev *ca;
-       unsigned i, nr_ptrs_effective = 0;
-       bool have_cache_dev = false;
-
-       BUG_ON(nr_replicas > ARRAY_SIZE(wp->ptrs));
-
-       for (i = wp->first_ptr; i < wp->nr_ptrs; i++) {
-               ca = bch_dev_bkey_exists(c, wp->ptrs[i]->ptr.dev);
-
-               nr_ptrs_effective += ca->mi.durability;
-               have_cache_dev |= !ca->mi.durability;
-       }
-
-       if (nr_ptrs_effective >= nr_replicas)
-               return ALLOC_SUCCESS;
+       bool alloc_failure = false;
+       unsigned i;
 
-       devs_sorted = bch2_wp_alloc_list(c, wp, devs);
+       BUG_ON(*nr_effective >= nr_replicas);
 
        for (i = 0; i < devs_sorted.nr; i++) {
-               int ob;
+               struct open_bucket *ob;
 
                ca = rcu_dereference(c->devs[devs_sorted.devs[i]]);
                if (!ca)
                        continue;
 
                if (!ca->mi.durability &&
-                   (have_cache_dev ||
+                   (*have_cache ||
                     wp->type != BCH_DATA_USER))
                        continue;
 
                ob = bch2_bucket_alloc(c, ca, reserve,
                                       wp->type == BCH_DATA_USER, cl);
-               if (ob < 0) {
-                       ret = ob;
+               if (IS_ERR(ob)) {
+                       enum bucket_alloc_ret ret = -PTR_ERR(ob);
+
+                       WARN_ON(reserve == RESERVE_MOVINGGC &&
+                               ret != OPEN_BUCKETS_EMPTY);
+
+                       if (cl)
+                               return -EAGAIN;
                        if (ret == OPEN_BUCKETS_EMPTY)
-                               break;
+                               return -ENOSPC;
+                       alloc_failure = true;
                        continue;
                }
 
-               BUG_ON(ob <= 0 || ob > U8_MAX);
-               BUG_ON(wp->nr_ptrs >= ARRAY_SIZE(wp->ptrs));
+               __clear_bit(ca->dev_idx, devs_may_alloc->d);
+               *nr_effective   += ca->mi.durability;
+               *have_cache     |= !ca->mi.durability;
 
-               wp->ptrs[wp->nr_ptrs++] = c->open_buckets + ob;
+               ob_push(c, ptrs, ob);
 
                bch2_wp_rescale(c, ca, wp);
 
-               nr_ptrs_effective += ca->mi.durability;
-               have_cache_dev |= !ca->mi.durability;
-
-               __clear_bit(ca->dev_idx, devs->d);
-
-               if (nr_ptrs_effective >= nr_replicas) {
-                       ret = ALLOC_SUCCESS;
-                       break;
-               }
+               if (*nr_effective >= nr_replicas)
+                       return 0;
        }
 
-       EBUG_ON(reserve == RESERVE_MOVINGGC &&
-               ret != ALLOC_SUCCESS &&
-               ret != OPEN_BUCKETS_EMPTY);
-
-       switch (ret) {
-       case ALLOC_SUCCESS:
-               return 0;
-       case NO_DEVICES:
-               return -EROFS;
-       case FREELIST_EMPTY:
-       case OPEN_BUCKETS_EMPTY:
-               return cl ? -EAGAIN : -ENOSPC;
-       default:
-               BUG();
-       }
+       return alloc_failure ? -ENOSPC : -EROFS;
 }
 
 /* Sector allocator */
 
-static void bch2_writepoint_drop_ptr(struct bch_fs *c,
-                                    struct write_point *wp,
-                                    unsigned i)
+static int get_buckets_from_writepoint(struct bch_fs *c,
+                                      struct open_buckets *ptrs,
+                                      struct write_point *wp,
+                                      struct bch_devs_mask *devs_may_alloc,
+                                      unsigned nr_replicas,
+                                      unsigned *nr_effective,
+                                      bool *have_cache)
 {
-       struct open_bucket *ob = wp->ptrs[i];
-       struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-
-       BUG_ON(ca->open_buckets_partial_nr >=
-              ARRAY_SIZE(ca->open_buckets_partial));
-
-       if (wp->type == BCH_DATA_USER) {
-               spin_lock(&c->freelist_lock);
-               ob->on_partial_list = true;
-               ca->open_buckets_partial[ca->open_buckets_partial_nr++] =
-                       ob - c->open_buckets;
-               spin_unlock(&c->freelist_lock);
-
-               closure_wake_up(&c->open_buckets_wait);
-               closure_wake_up(&c->freelist_wait);
-       } else {
-               bch2_open_bucket_put(c, ob);
-       }
-
-       array_remove_item(wp->ptrs, wp->nr_ptrs, i);
-
-       if (i < wp->first_ptr)
-               wp->first_ptr--;
-}
-
-void bch2_writepoint_drop_ptrs(struct bch_fs *c,
-                              struct write_point *wp,
-                              u16 target, bool in_target)
-{
-       int i;
-
-       for (i = wp->first_ptr - 1; i >= 0; --i)
-               if (bch2_dev_in_target(c, wp->ptrs[i]->ptr.dev,
-                                      target) == in_target)
-                       bch2_writepoint_drop_ptr(c, wp, i);
-}
-
-static void verify_not_stale(struct bch_fs *c, const struct write_point *wp)
-{
-#ifdef CONFIG_BCACHEFS_DEBUG
+       struct open_buckets ptrs_skip = { .nr = 0 };
        struct open_bucket *ob;
        unsigned i;
 
-       writepoint_for_each_ptr_all(wp, ob, i) {
+       open_bucket_for_each(c, &wp->ptrs, ob, i) {
                struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
 
-               BUG_ON(ptr_stale(ca, &ob->ptr));
+               if (*nr_effective < nr_replicas &&
+                   test_bit(ob->ptr.dev, devs_may_alloc->d) &&
+                   (ca->mi.durability ||
+                    (wp->type == BCH_DATA_USER && !*have_cache))) {
+                       __clear_bit(ob->ptr.dev, devs_may_alloc->d);
+                       *nr_effective   += ca->mi.durability;
+                       *have_cache     |= !ca->mi.durability;
+
+                       ob_push(c, ptrs, ob);
+               } else {
+                       ob_push(c, &ptrs_skip, ob);
+               }
        }
-#endif
+       wp->ptrs = ptrs_skip;
+
+       return *nr_effective < nr_replicas ? -ENOSPC : 0;
 }
 
 static int open_bucket_add_buckets(struct bch_fs *c,
-                                  u16 target,
+                                  struct open_buckets *ptrs,
                                   struct write_point *wp,
                                   struct bch_devs_list *devs_have,
+                                  u16 target,
                                   unsigned nr_replicas,
+                                  unsigned *nr_effective,
+                                  bool *have_cache,
                                   enum alloc_reserve reserve,
                                   struct closure *cl)
 {
-       struct bch_devs_mask devs = c->rw_devs[wp->type];
+       struct bch_devs_mask devs;
        const struct bch_devs_mask *t;
        struct open_bucket *ob;
        unsigned i;
@@ -449,19 +445,38 @@ static int open_bucket_add_buckets(struct bch_fs *c,
        percpu_down_read(&c->usage_lock);
        rcu_read_lock();
 
+       devs = c->rw_devs[wp->type];
+
        /* Don't allocate from devices we already have pointers to: */
        for (i = 0; i < devs_have->nr; i++)
                __clear_bit(devs_have->devs[i], devs.d);
 
-       writepoint_for_each_ptr_all(wp, ob, i)
+       open_bucket_for_each(c, ptrs, ob, i)
                __clear_bit(ob->ptr.dev, devs.d);
 
        t = bch2_target_to_mask(c, target);
        if (t)
                bitmap_and(devs.d, devs.d, t->d, BCH_SB_MEMBERS_MAX);
 
-       ret = bch2_bucket_alloc_set(c, wp, nr_replicas, reserve, &devs, cl);
+       ret = get_buckets_from_writepoint(c, ptrs, wp, &devs,
+                               nr_replicas, nr_effective, have_cache);
+       if (!ret)
+               goto out;
 
+       /*
+        * Try nonblocking first, so that if one device is full we'll try from
+        * other devices:
+        */
+       ret = bch2_bucket_alloc_set(c, ptrs, wp, &devs,
+                               nr_replicas, nr_effective, have_cache,
+                               reserve, NULL);
+       if (!ret || ret == -EROFS || !cl)
+               goto out;
+
+       ret = bch2_bucket_alloc_set(c, ptrs, wp, &devs,
+                               nr_replicas, nr_effective, have_cache,
+                               reserve, cl);
+out:
        rcu_read_unlock();
        percpu_up_read(&c->usage_lock);
 
@@ -471,13 +486,18 @@ static int open_bucket_add_buckets(struct bch_fs *c,
 void bch2_writepoint_stop(struct bch_fs *c, struct bch_dev *ca,
                          struct write_point *wp)
 {
-       struct bch_devs_mask not_self;
-
-       bitmap_complement(not_self.d, ca->self.d, BCH_SB_MEMBERS_MAX);
+       struct open_buckets ptrs = { .nr = 0 };
+       struct open_bucket *ob;
+       unsigned i;
 
        mutex_lock(&wp->lock);
-       wp->first_ptr = wp->nr_ptrs;
-       bch2_writepoint_drop_ptrs(c, wp, dev_to_target(ca->dev_idx), true);
+       open_bucket_for_each(c, &wp->ptrs, ob, i)
+               if (ob->ptr.dev == ca->dev_idx)
+                       open_bucket_free_unused(c, wp, ob);
+               else
+                       ob_push(c, &ptrs, ob);
+
+       wp->ptrs = ptrs;
        mutex_unlock(&wp->lock);
 }
 
@@ -558,134 +578,64 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
 {
        struct write_point *wp;
        struct open_bucket *ob;
-       struct bch_dev *ca;
-       unsigned nr_ptrs_have, nr_ptrs_effective;
-       int ret, i, cache_idx = -1;
+       unsigned nr_effective = 0;
+       struct open_buckets ptrs = { .nr = 0 };
+       bool have_cache = false;
+       int ret = 0, i;
 
        BUG_ON(!nr_replicas || !nr_replicas_required);
 
        wp = writepoint_find(c, write_point.v);
 
-       wp->first_ptr = 0;
-
-       /* does writepoint have ptrs we can't use? */
-       writepoint_for_each_ptr(wp, ob, i)
-               if (bch2_dev_list_has_dev(*devs_have, ob->ptr.dev)) {
-                       swap(wp->ptrs[i], wp->ptrs[wp->first_ptr]);
-                       wp->first_ptr++;
-               }
-
-       nr_ptrs_have = wp->first_ptr;
-
-       /* does writepoint have ptrs we don't want to use? */
-       if (target)
-               writepoint_for_each_ptr(wp, ob, i)
-                       if (!bch2_dev_in_target(c, ob->ptr.dev, target)) {
-                               swap(wp->ptrs[i], wp->ptrs[wp->first_ptr]);
-                               wp->first_ptr++;
-                       }
-
-       if (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS) {
-               ret = open_bucket_add_buckets(c, target, wp, devs_have,
-                                             nr_replicas, reserve, cl);
+       if (!target || (flags & BCH_WRITE_ONLY_SPECIFIED_DEVS)) {
+               ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, target,
+                                             nr_replicas, &nr_effective,
+                                             &have_cache, reserve, cl);
        } else {
-               ret = open_bucket_add_buckets(c, target, wp, devs_have,
-                                             nr_replicas, reserve, NULL);
+               ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, target,
+                                             nr_replicas, &nr_effective,
+                                             &have_cache, reserve, NULL);
                if (!ret)
                        goto alloc_done;
 
-               wp->first_ptr = nr_ptrs_have;
-
-               ret = open_bucket_add_buckets(c, 0, wp, devs_have,
-                                             nr_replicas, reserve, cl);
+               ret = open_bucket_add_buckets(c, &ptrs, wp, devs_have, 0,
+                                             nr_replicas, &nr_effective,
+                                             &have_cache, reserve, cl);
        }
-
-       if (ret && ret != -EROFS)
-               goto err;
 alloc_done:
-       /* check for more than one cache: */
-       for (i = wp->nr_ptrs - 1; i >= wp->first_ptr; --i) {
-               ca = bch_dev_bkey_exists(c, wp->ptrs[i]->ptr.dev);
-
-               if (ca->mi.durability)
-                       continue;
-
-               /*
-                * if we ended up with more than one cache device, prefer the
-                * one in the target we want:
-                */
-               if (cache_idx >= 0) {
-                       if (!bch2_dev_in_target(c, wp->ptrs[i]->ptr.dev,
-                                               target)) {
-                               bch2_writepoint_drop_ptr(c, wp, i);
-                       } else {
-                               bch2_writepoint_drop_ptr(c, wp, cache_idx);
-                               cache_idx = i;
-                       }
-               } else {
-                       cache_idx = i;
-               }
-       }
-
-       /* we might have more effective replicas than required: */
-       nr_ptrs_effective = 0;
-       writepoint_for_each_ptr(wp, ob, i) {
-               ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-               nr_ptrs_effective += ca->mi.durability;
-       }
+       BUG_ON(!ret && nr_effective < nr_replicas);
 
        if (ret == -EROFS &&
-           nr_ptrs_effective >= nr_replicas_required)
+           nr_effective >= nr_replicas_required)
                ret = 0;
 
        if (ret)
                goto err;
 
-       if (nr_ptrs_effective > nr_replicas) {
-               writepoint_for_each_ptr(wp, ob, i) {
-                       ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-
-                       if (ca->mi.durability &&
-                           ca->mi.durability <= nr_ptrs_effective - nr_replicas &&
-                           !bch2_dev_in_target(c, ob->ptr.dev, target)) {
-                               swap(wp->ptrs[i], wp->ptrs[wp->first_ptr]);
-                               wp->first_ptr++;
-                               nr_ptrs_effective -= ca->mi.durability;
-                       }
-               }
-       }
-
-       if (nr_ptrs_effective > nr_replicas) {
-               writepoint_for_each_ptr(wp, ob, i) {
-                       ca = bch_dev_bkey_exists(c, ob->ptr.dev);
-
-                       if (ca->mi.durability &&
-                           ca->mi.durability <= nr_ptrs_effective - nr_replicas) {
-                               swap(wp->ptrs[i], wp->ptrs[wp->first_ptr]);
-                               wp->first_ptr++;
-                               nr_ptrs_effective -= ca->mi.durability;
-                       }
-               }
-       }
-
-       /* Remove pointers we don't want to use: */
-       if (target)
-               bch2_writepoint_drop_ptrs(c, wp, target, false);
+       /* Free buckets we didn't use: */
+       open_bucket_for_each(c, &wp->ptrs, ob, i)
+               open_bucket_free_unused(c, wp, ob);
 
-       BUG_ON(wp->first_ptr >= wp->nr_ptrs);
-       BUG_ON(nr_ptrs_effective < nr_replicas_required);
+       wp->ptrs = ptrs;
 
        wp->sectors_free = UINT_MAX;
 
-       writepoint_for_each_ptr(wp, ob, i)
+       open_bucket_for_each(c, &wp->ptrs, ob, i)
                wp->sectors_free = min(wp->sectors_free, ob->sectors_free);
 
        BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
 
-       verify_not_stale(c, wp);
+       verify_not_stale(c, &wp->ptrs);
 
        return wp;
 err:
+       open_bucket_for_each(c, &wp->ptrs, ob, i)
+               if (ptrs.nr < ARRAY_SIZE(ptrs.v))
+                       ob_push(c, &ptrs, ob);
+               else
+                       open_bucket_free_unused(c, wp, ob);
+       wp->ptrs = ptrs;
+
        mutex_unlock(&wp->lock);
        return ERR_PTR(ret);
 }
@@ -703,7 +653,7 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
        BUG_ON(sectors > wp->sectors_free);
        wp->sectors_free -= sectors;
 
-       writepoint_for_each_ptr(wp, ob, i) {
+       open_bucket_for_each(c, &wp->ptrs, ob, i) {
                struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev);
                struct bch_extent_ptr tmp = ob->ptr;
 
@@ -726,16 +676,15 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
  */
 void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
 {
-       int i;
+       struct open_buckets ptrs = { .nr = 0 }, keep = { .nr = 0 };
+       struct open_bucket *ob;
+       unsigned i;
 
-       for (i = wp->nr_ptrs - 1; i >= 0; --i) {
-               struct open_bucket *ob = wp->ptrs[i];
-
-               if (!ob->sectors_free) {
-                       array_remove_item(wp->ptrs, wp->nr_ptrs, i);
-                       bch2_open_bucket_put(c, ob);
-               }
-       }
+       open_bucket_for_each(c, &wp->ptrs, ob, i)
+               ob_push(c, !ob->sectors_free ? &ptrs : &keep, ob);
+       wp->ptrs = keep;
 
        mutex_unlock(&wp->lock);
+
+       bch2_open_buckets_put(c, &ptrs);
 }
index 1c738e4ba6c979da179d2aeab56a9148139dda61..609685d08642b66c4e82bdafde9a3bbd7e48f883 100644 (file)
@@ -23,19 +23,23 @@ void bch2_wp_rescale(struct bch_fs *, struct bch_dev *,
 
 long bch2_bucket_alloc_new_fs(struct bch_dev *);
 
-int bch2_bucket_alloc(struct bch_fs *, struct bch_dev *, enum alloc_reserve, bool,
-                     struct closure *);
+struct open_bucket *bch2_bucket_alloc(struct bch_fs *, struct bch_dev *,
+                                     enum alloc_reserve, bool,
+                                     struct closure *);
 
-#define __writepoint_for_each_ptr(_wp, _ob, _i, _start)                        \
-       for ((_i) = (_start);                                           \
-            (_i) < (_wp)->nr_ptrs && ((_ob) = (_wp)->ptrs[_i], true);  \
-            (_i)++)
+static inline void ob_push(struct bch_fs *c, struct open_buckets *obs,
+                          struct open_bucket *ob)
+{
+       BUG_ON(obs->nr >= ARRAY_SIZE(obs->v));
 
-#define writepoint_for_each_ptr_all(_wp, _ob, _i)                      \
-       __writepoint_for_each_ptr(_wp, _ob, _i, 0)
+       obs->v[obs->nr++] = ob - c->open_buckets;
+}
 
-#define writepoint_for_each_ptr(_wp, _ob, _i)                          \
-       __writepoint_for_each_ptr(_wp, _ob, _i, wp->first_ptr)
+#define open_bucket_for_each(_c, _obs, _ob, _i)                                \
+       for ((_i) = 0;                                                  \
+            (_i) < (_obs)->nr &&                                       \
+            ((_ob) = (_c)->open_buckets + (_obs)->v[_i], true);        \
+            (_i)++)
 
 void __bch2_open_bucket_put(struct bch_fs *, struct open_bucket *);
 
@@ -45,26 +49,27 @@ static inline void bch2_open_bucket_put(struct bch_fs *c, struct open_bucket *ob
                __bch2_open_bucket_put(c, ob);
 }
 
-static inline void bch2_open_bucket_put_refs(struct bch_fs *c, u8 *nr, u8 *refs)
+static inline void bch2_open_buckets_put(struct bch_fs *c,
+                                        struct open_buckets *ptrs)
 {
+       struct open_bucket *ob;
        unsigned i;
 
-       for (i = 0; i < *nr; i++)
-               bch2_open_bucket_put(c, c->open_buckets + refs[i]);
-
-       *nr = 0;
+       open_bucket_for_each(c, ptrs, ob, i)
+               bch2_open_bucket_put(c, ob);
+       ptrs->nr = 0;
 }
 
 static inline void bch2_open_bucket_get(struct bch_fs *c,
                                        struct write_point *wp,
-                                       u8 *nr, u8 *refs)
+                                       struct open_buckets *ptrs)
 {
        struct open_bucket *ob;
        unsigned i;
 
-       writepoint_for_each_ptr(wp, ob, i) {
+       open_bucket_for_each(c, &wp->ptrs, ob, i) {
                atomic_inc(&ob->pin);
-               refs[(*nr)++] = ob - c->open_buckets;
+               ob_push(c, ptrs, ob);
        }
 }
 
@@ -84,9 +89,6 @@ void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *);
 void bch2_writepoint_stop(struct bch_fs *, struct bch_dev *,
                          struct write_point *);
 
-void bch2_writepoint_drop_ptrs(struct bch_fs *, struct write_point *,
-                              u16, bool);
-
 static inline struct hlist_head *writepoint_hash(struct bch_fs *c,
                                                 unsigned long write_point)
 {
index 035c50052167a01423919f629a21fa2dc05cbc3b..e0306d68ae9fda49f058adf876edafc1b86b717c 100644 (file)
@@ -58,6 +58,13 @@ struct open_bucket {
        struct bch_extent_ptr   ptr;
 };
 
+#define OPEN_BUCKET_LIST_MAX   15
+
+struct open_buckets {
+       u8                      nr;
+       u8                      v[OPEN_BUCKET_LIST_MAX];
+};
+
 struct write_point {
        struct hlist_node       node;
        struct mutex            lock;
@@ -65,13 +72,10 @@ struct write_point {
        unsigned long           write_point;
        enum bch_data_type      type;
 
-       u8                      nr_ptrs;
-       u8                      first_ptr;
-
        /* calculated based on how many pointers we're actually going to use: */
        unsigned                sectors_free;
 
-       struct open_bucket      *ptrs[BCH_REPLICAS_MAX * 2];
+       struct open_buckets     ptrs;
        u64                     next_alloc[BCH_SB_MEMBERS_MAX];
 };
 
index 7fd75435542b8752cf377a0f06127447c60d8b1e..d07a6b297078625a90a8098ce86bbe9c388a14af 100644 (file)
@@ -6,6 +6,7 @@
 
 #include "bcachefs.h"
 #include "alloc_background.h"
+#include "alloc_foreground.h"
 #include "bkey_methods.h"
 #include "btree_locking.h"
 #include "btree_update_interior.h"
@@ -803,7 +804,7 @@ next:
        bch2_btree_iter_node_replace(iter, new_nodes[0]);
 
        for (i = 0; i < nr_new_nodes; i++)
-               bch2_btree_open_bucket_put(c, new_nodes[i]);
+               bch2_open_buckets_put(c, &new_nodes[i]->ob);
 
        /* Free the old nodes and update our sliding window */
        for (i = 0; i < nr_old_nodes; i++) {
index 5053ed5f2762788a900051feee2e1ad159511206..dd9660a9f12bdfbe768c879a0cae044528dcb669 100644 (file)
@@ -54,13 +54,8 @@ struct btree_write {
        struct closure_waitlist         wait;
 };
 
-struct btree_ob_ref {
-       u8                      nr;
-       u8                      refs[BCH_REPLICAS_MAX];
-};
-
 struct btree_alloc {
-       struct btree_ob_ref     ob;
+       struct open_buckets     ob;
        BKEY_PADDED(k);
 };
 
@@ -127,7 +122,7 @@ struct btree {
         */
        unsigned long           will_make_reachable;
 
-       struct btree_ob_ref     ob;
+       struct open_buckets     ob;
 
        /* lru list */
        struct list_head        list;
index 0ca998035bab435e83f0722ce6700094322d5a6b..26721c5a871c336f594c92bb4cd42fcc617452a1 100644 (file)
@@ -247,7 +247,7 @@ static void __btree_node_free(struct bch_fs *c, struct btree *b)
 
 void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
 {
-       struct btree_ob_ref ob = b->ob;
+       struct open_buckets ob = b->ob;
 
        btree_update_drop_new_node(c, b);
 
@@ -259,7 +259,7 @@ void bch2_btree_node_free_never_inserted(struct bch_fs *c, struct btree *b)
        __btree_node_free(c, b);
        six_unlock_write(&b->lock);
 
-       bch2_open_bucket_put_refs(c, &ob.nr, ob.refs);
+       bch2_open_buckets_put(c, &ob);
 }
 
 void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
@@ -300,11 +300,6 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c,
         */
 }
 
-void bch2_btree_open_bucket_put(struct bch_fs *c, struct btree *b)
-{
-       bch2_open_bucket_put_refs(c, &b->ob.nr, b->ob.refs);
-}
-
 static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
                                             struct disk_reservation *res,
                                             struct closure *cl,
@@ -314,7 +309,7 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
        struct btree *b;
        BKEY_PADDED(k) tmp;
        struct bkey_i_extent *e;
-       struct btree_ob_ref ob;
+       struct open_buckets ob = { .nr = 0 };
        struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
        unsigned nr_reserve;
        enum alloc_reserve alloc_reserve;
@@ -356,7 +351,7 @@ retry:
                struct open_bucket *ob;
                unsigned i;
 
-               writepoint_for_each_ptr(wp, ob, i)
+               open_bucket_for_each(c, &wp->ptrs, ob, i)
                        if (ob->sectors_free < c->opts.btree_node_size)
                                ob->sectors_free = 0;
 
@@ -367,8 +362,7 @@ retry:
        e = bkey_extent_init(&tmp.k);
        bch2_alloc_sectors_append_ptrs(c, wp, e, c->opts.btree_node_size);
 
-       ob.nr = 0;
-       bch2_open_bucket_get(c, wp, &ob.nr, ob.refs);
+       bch2_open_bucket_get(c, wp, &ob);
        bch2_alloc_sectors_done(c, wp);
 mem_alloc:
        b = bch2_btree_node_mem_alloc(c);
@@ -489,7 +483,7 @@ static void bch2_btree_reserve_put(struct bch_fs *c, struct btree_reserve *reser
                        b->ob.nr = 0;
                        bkey_copy(&a->k, &b->key);
                } else {
-                       bch2_btree_open_bucket_put(c, b);
+                       bch2_open_buckets_put(c, &b->ob);
                }
 
                btree_node_lock_type(c, b, SIX_LOCK_write);
@@ -1432,11 +1426,11 @@ static void btree_split(struct btree_update *as, struct btree *b,
                bch2_btree_set_root(as, n1, iter);
        }
 
-       bch2_btree_open_bucket_put(c, n1);
+       bch2_open_buckets_put(c, &n1->ob);
        if (n2)
-               bch2_btree_open_bucket_put(c, n2);
+               bch2_open_buckets_put(c, &n2->ob);
        if (n3)
-               bch2_btree_open_bucket_put(c, n3);
+               bch2_open_buckets_put(c, &n3->ob);
 
        /*
         * Note - at this point other linked iterators could still have @b read
@@ -1751,7 +1745,7 @@ retry:
 
        bch2_btree_insert_node(as, parent, iter, &as->parent_keys, flags);
 
-       bch2_btree_open_bucket_put(c, n);
+       bch2_open_buckets_put(c, &n->ob);
        bch2_btree_node_free_inmem(c, b, iter);
        bch2_btree_node_free_inmem(c, m, iter);
        bch2_btree_iter_node_replace(iter, n);
@@ -1843,7 +1837,7 @@ static int __btree_node_rewrite(struct bch_fs *c, struct btree_iter *iter,
                bch2_btree_set_root(as, n, iter);
        }
 
-       bch2_btree_open_bucket_put(c, n);
+       bch2_open_buckets_put(c, &n->ob);
 
        bch2_btree_node_free_inmem(c, b, iter);
 
index b24988352b03ac16f0c2c8b4b9240dacdf0adb9a..e5156e9081106e97e57833e6c30240849123c1e2 100644 (file)
@@ -132,7 +132,6 @@ struct btree_update {
 void bch2_btree_node_free_inmem(struct bch_fs *, struct btree *,
                                struct btree_iter *);
 void bch2_btree_node_free_never_inserted(struct bch_fs *, struct btree *);
-void bch2_btree_open_bucket_put(struct bch_fs *, struct btree *);
 
 struct btree *__bch2_btree_node_alloc_replacement(struct btree_update *,
                                                  struct btree *,
index dfd2d3b708c5da2163b007781d8f127317462d1d..d5c17024c8848cae6a93805de56b0e72a8b634f6 100644 (file)
@@ -369,7 +369,7 @@ static void __bch2_write_index(struct bch_write_op *op)
                }
        }
 out:
-       bch2_open_bucket_put_refs(c, &op->open_buckets_nr, op->open_buckets);
+       bch2_open_buckets_put(c, &op->open_buckets);
        return;
 err:
        keys->top = keys->keys;
@@ -816,8 +816,8 @@ static void __bch2_write(struct closure *cl)
 again:
        do {
                /* +1 for possible cache device: */
-               if (op->open_buckets_nr + op->nr_replicas + 1 >
-                   ARRAY_SIZE(op->open_buckets))
+               if (op->open_buckets.nr + op->nr_replicas + 1 >
+                   ARRAY_SIZE(op->open_buckets.v))
                        goto flush_io;
 
                if (bch2_keylist_realloc(&op->insert_keys,
@@ -848,11 +848,7 @@ again:
 
                ret = bch2_write_extent(op, wp);
 
-               BUG_ON(op->open_buckets_nr + wp->nr_ptrs - wp->first_ptr >
-                      ARRAY_SIZE(op->open_buckets));
-               bch2_open_bucket_get(c, wp,
-                                    &op->open_buckets_nr,
-                                    op->open_buckets);
+               bch2_open_bucket_get(c, wp, &op->open_buckets);
                bch2_alloc_sectors_done(c, wp);
 
                if (ret < 0)
index 62f5861005ea6d9c71ab1498eee7fb8ab84ca7e4..1cc040a413eed7bc4d6b9bfeebc799dc16a5c0b2 100644 (file)
@@ -75,7 +75,7 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
        op->nr_replicas         = 0;
        op->nr_replicas_required = c->opts.data_replicas_required;
        op->alloc_reserve       = RESERVE_NONE;
-       op->open_buckets_nr     = 0;
+       op->open_buckets.nr     = 0;
        op->devs_have.nr        = 0;
        op->target              = 0;
        op->opts                = opts;
index b313128ed857f978410e36def56706b464d13ac6..48273bb68c94b8594bb52998ae944b3a44534fe4 100644 (file)
@@ -106,7 +106,6 @@ struct bch_write_op {
        unsigned                nr_replicas_required:4;
        unsigned                alloc_reserve:4;
 
-       u8                      open_buckets_nr;
        struct bch_devs_list    devs_have;
        u16                     target;
        u16                     nonce;
@@ -123,7 +122,7 @@ struct bch_write_op {
 
        struct disk_reservation res;
 
-       u8                      open_buckets[16];
+       struct open_buckets     open_buckets;
 
        /*
         * If caller wants to flush but hasn't passed us a journal_seq ptr, we
index 97fbc2698dc01ca808660e5f195056710a027b55..7499e15a29821306583ee7b5ecf35830807d3c48 100644 (file)
@@ -717,13 +717,13 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                                goto err;
                        }
                } else {
-                       int ob_idx = bch2_bucket_alloc(c, ca, RESERVE_ALLOC, false, cl);
-                       if (ob_idx < 0) {
+                       ob = bch2_bucket_alloc(c, ca, RESERVE_ALLOC,
+                                              false, cl);
+                       if (IS_ERR(ob)) {
                                ret = cl ? -EAGAIN : -ENOSPC;
                                goto err;
                        }
 
-                       ob = c->open_buckets + ob_idx;
                        bucket = sector_to_bucket(ca, ob->ptr.offset);
                }