]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blobdiff - block/blk-iocost.c
blk-iocost: move iocg_kick_delay() above iocg_kick_waitq()
[mirror_ubuntu-hirsute-kernel.git] / block / blk-iocost.c
index 413e0b5c8e6b08cb9e76af181b267bbed84e8ead..ac22d761a35021a3cf17909958b145bfff0022fa 100644 (file)
 #include <linux/parser.h>
 #include <linux/sched/signal.h>
 #include <linux/blk-cgroup.h>
+#include <asm/local.h>
+#include <asm/local64.h>
 #include "blk-rq-qos.h"
 #include "blk-stat.h"
 #include "blk-wbt.h"
@@ -373,8 +375,8 @@ struct ioc_params {
 };
 
 struct ioc_missed {
-       u32                             nr_met;
-       u32                             nr_missed;
+       local_t                         nr_met;
+       local_t                         nr_missed;
        u32                             last_met;
        u32                             last_missed;
 };
@@ -382,7 +384,7 @@ struct ioc_missed {
 struct ioc_pcpu_stat {
        struct ioc_missed               missed[2];
 
-       u64                             rq_wait_ns;
+       local64_t                       rq_wait_ns;
        u64                             last_rq_wait_ns;
 };
 
@@ -477,7 +479,7 @@ struct ioc_gq {
        atomic64_t                      active_period;
        struct list_head                active_list;
 
-       /* see __propagate_active_weight() and current_hweight() for details */
+       /* see __propagate_weights() and current_hweight() for details */
        u64                             child_active_sum;
        u64                             child_inuse_sum;
        int                             hweight_gen;
@@ -888,14 +890,17 @@ static void ioc_start_period(struct ioc *ioc, struct ioc_now *now)
  * Update @iocg's `active` and `inuse` to @active and @inuse, update level
  * weight sums and propagate upwards accordingly.
  */
-static void __propagate_active_weight(struct ioc_gq *iocg, u32 active, u32 inuse)
+static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
 {
        struct ioc *ioc = iocg->ioc;
        int lvl;
 
        lockdep_assert_held(&ioc->lock);
 
-       inuse = min(active, inuse);
+       inuse = clamp_t(u32, inuse, 1, active);
+
+       if (active == iocg->active && inuse == iocg->inuse)
+               return;
 
        for (lvl = iocg->level - 1; lvl >= 0; lvl--) {
                struct ioc_gq *parent = iocg->ancestors[lvl];
@@ -933,7 +938,7 @@ static void __propagate_active_weight(struct ioc_gq *iocg, u32 active, u32 inuse
        ioc->weights_updated = true;
 }
 
-static void commit_active_weights(struct ioc *ioc)
+static void commit_weights(struct ioc *ioc)
 {
        lockdep_assert_held(&ioc->lock);
 
@@ -945,10 +950,10 @@ static void commit_active_weights(struct ioc *ioc)
        }
 }
 
-static void propagate_active_weight(struct ioc_gq *iocg, u32 active, u32 inuse)
+static void propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
 {
-       __propagate_active_weight(iocg, active, inuse);
-       commit_active_weights(iocg->ioc);
+       __propagate_weights(iocg, active, inuse);
+       commit_weights(iocg->ioc);
 }
 
 static void current_hweight(struct ioc_gq *iocg, u32 *hw_activep, u32 *hw_inusep)
@@ -964,9 +969,9 @@ static void current_hweight(struct ioc_gq *iocg, u32 *hw_activep, u32 *hw_inusep
                goto out;
 
        /*
-        * Paired with wmb in commit_active_weights().  If we saw the
-        * updated hweight_gen, all the weight updates from
-        * __propagate_active_weight() are visible too.
+        * Paired with wmb in commit_weights(). If we saw the updated
+        * hweight_gen, all the weight updates from __propagate_weights() are
+        * visible too.
         *
         * We can race with weight updates during calculation and get it
         * wrong.  However, hweight_gen would have changed and a future
@@ -1016,7 +1021,7 @@ static void weight_updated(struct ioc_gq *iocg)
 
        weight = iocg->cfg_weight ?: iocc->dfl_weight;
        if (weight != iocg->weight && iocg->active)
-               propagate_active_weight(iocg, weight,
+               propagate_weights(iocg, weight,
                        DIV64_U64_ROUND_UP(iocg->inuse * weight, iocg->weight));
        iocg->weight = weight;
 }
@@ -1088,8 +1093,8 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
         */
        iocg->hweight_gen = atomic_read(&ioc->hweight_gen) - 1;
        list_add(&iocg->active_list, &ioc->active_iocgs);
-       propagate_active_weight(iocg, iocg->weight,
-                               iocg->last_inuse ?: iocg->weight);
+       propagate_weights(iocg, iocg->weight,
+                         iocg->last_inuse ?: iocg->weight);
 
        TRACE_IOCG_PATH(iocg_activate, iocg, now,
                        last_period, cur_period, vtime);
@@ -1110,6 +1115,66 @@ fail_unlock:
        return false;
 }
 
+static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
+{
+       struct ioc *ioc = iocg->ioc;
+       struct blkcg_gq *blkg = iocg_to_blkg(iocg);
+       u64 vtime = atomic64_read(&iocg->vtime);
+       u64 vmargin = ioc->margin_us * now->vrate;
+       u64 margin_ns = ioc->margin_us * NSEC_PER_USEC;
+       u64 delta_ns, expires, oexpires;
+       u32 hw_inuse;
+
+       lockdep_assert_held(&iocg->waitq.lock);
+
+       /* debt-adjust vtime */
+       current_hweight(iocg, NULL, &hw_inuse);
+       vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
+
+       /*
+        * Clear or maintain depending on the overage. Non-zero vdebt is what
+        * guarantees that @iocg is online and future iocg_kick_delay() will
+        * clear use_delay. Don't leave it on when there's no vdebt.
+        */
+       if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) {
+               blkcg_clear_delay(blkg);
+               return false;
+       }
+       if (!atomic_read(&blkg->use_delay) &&
+           time_before_eq64(vtime, now->vnow + vmargin))
+               return false;
+
+       /* use delay */
+       delta_ns = DIV64_U64_ROUND_UP(vtime - now->vnow,
+                                     now->vrate) * NSEC_PER_USEC;
+       blkcg_set_delay(blkg, delta_ns);
+       expires = now->now_ns + delta_ns;
+
+       /* if already active and close enough, don't bother */
+       oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->delay_timer));
+       if (hrtimer_is_queued(&iocg->delay_timer) &&
+           abs(oexpires - expires) <= margin_ns / 4)
+               return true;
+
+       hrtimer_start_range_ns(&iocg->delay_timer, ns_to_ktime(expires),
+                              margin_ns / 4, HRTIMER_MODE_ABS);
+       return true;
+}
+
+static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
+{
+       struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer);
+       struct ioc_now now;
+       unsigned long flags;
+
+       spin_lock_irqsave(&iocg->waitq.lock, flags);
+       ioc_now(iocg->ioc, &now);
+       iocg_kick_delay(iocg, &now);
+       spin_unlock_irqrestore(&iocg->waitq.lock, flags);
+
+       return HRTIMER_NORESTART;
+}
+
 static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode,
                        int flags, void *key)
 {
@@ -1206,66 +1271,6 @@ static enum hrtimer_restart iocg_waitq_timer_fn(struct hrtimer *timer)
        return HRTIMER_NORESTART;
 }
 
-static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
-{
-       struct ioc *ioc = iocg->ioc;
-       struct blkcg_gq *blkg = iocg_to_blkg(iocg);
-       u64 vtime = atomic64_read(&iocg->vtime);
-       u64 vmargin = ioc->margin_us * now->vrate;
-       u64 margin_ns = ioc->margin_us * NSEC_PER_USEC;
-       u64 delta_ns, expires, oexpires;
-       u32 hw_inuse;
-
-       lockdep_assert_held(&iocg->waitq.lock);
-
-       /* debt-adjust vtime */
-       current_hweight(iocg, NULL, &hw_inuse);
-       vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
-
-       /*
-        * Clear or maintain depending on the overage. Non-zero vdebt is what
-        * guarantees that @iocg is online and future iocg_kick_delay() will
-        * clear use_delay. Don't leave it on when there's no vdebt.
-        */
-       if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) {
-               blkcg_clear_delay(blkg);
-               return false;
-       }
-       if (!atomic_read(&blkg->use_delay) &&
-           time_before_eq64(vtime, now->vnow + vmargin))
-               return false;
-
-       /* use delay */
-       delta_ns = DIV64_U64_ROUND_UP(vtime - now->vnow,
-                                     now->vrate) * NSEC_PER_USEC;
-       blkcg_set_delay(blkg, delta_ns);
-       expires = now->now_ns + delta_ns;
-
-       /* if already active and close enough, don't bother */
-       oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->delay_timer));
-       if (hrtimer_is_queued(&iocg->delay_timer) &&
-           abs(oexpires - expires) <= margin_ns / 4)
-               return true;
-
-       hrtimer_start_range_ns(&iocg->delay_timer, ns_to_ktime(expires),
-                              margin_ns / 4, HRTIMER_MODE_ABS);
-       return true;
-}
-
-static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
-{
-       struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer);
-       struct ioc_now now;
-       unsigned long flags;
-
-       spin_lock_irqsave(&iocg->waitq.lock, flags);
-       ioc_now(iocg->ioc, &now);
-       iocg_kick_delay(iocg, &now);
-       spin_unlock_irqrestore(&iocg->waitq.lock, flags);
-
-       return HRTIMER_NORESTART;
-}
-
 static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p)
 {
        u32 nr_met[2] = { };
@@ -1278,8 +1283,8 @@ static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p
                u64 this_rq_wait_ns;
 
                for (rw = READ; rw <= WRITE; rw++) {
-                       u32 this_met = READ_ONCE(stat->missed[rw].nr_met);
-                       u32 this_missed = READ_ONCE(stat->missed[rw].nr_missed);
+                       u32 this_met = local_read(&stat->missed[rw].nr_met);
+                       u32 this_missed = local_read(&stat->missed[rw].nr_missed);
 
                        nr_met[rw] += this_met - stat->missed[rw].last_met;
                        nr_missed[rw] += this_missed - stat->missed[rw].last_missed;
@@ -1287,7 +1292,7 @@ static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p
                        stat->missed[rw].last_missed = this_missed;
                }
 
-               this_rq_wait_ns = READ_ONCE(stat->rq_wait_ns);
+               this_rq_wait_ns = local64_read(&stat->rq_wait_ns);
                rq_wait_ns += this_rq_wait_ns - stat->last_rq_wait_ns;
                stat->last_rq_wait_ns = this_rq_wait_ns;
        }
@@ -1382,13 +1387,13 @@ static void ioc_timer_fn(struct timer_list *timer)
                } else if (iocg_is_idle(iocg)) {
                        /* no waiter and idle, deactivate */
                        iocg->last_inuse = iocg->inuse;
-                       __propagate_active_weight(iocg, 0, 0);
+                       __propagate_weights(iocg, 0, 0);
                        list_del_init(&iocg->active_list);
                }
 
                spin_unlock(&iocg->waitq.lock);
        }
-       commit_active_weights(ioc);
+       commit_weights(ioc);
 
        /* calc usages and see whether some weights need to be moved around */
        list_for_each_entry(iocg, &ioc->active_iocgs, active_list) {
@@ -1481,8 +1486,8 @@ static void ioc_timer_fn(struct timer_list *timer)
                                TRACE_IOCG_PATH(inuse_takeback, iocg, &now,
                                                iocg->inuse, new_inuse,
                                                hw_inuse, new_hwi);
-                               __propagate_active_weight(iocg, iocg->weight,
-                                                         new_inuse);
+                               __propagate_weights(iocg, iocg->weight,
+                                                   new_inuse);
                        }
                } else {
                        /* genuninely out of vtime */
@@ -1522,11 +1527,11 @@ static void ioc_timer_fn(struct timer_list *timer)
                        TRACE_IOCG_PATH(inuse_giveaway, iocg, &now,
                                        iocg->inuse, new_inuse,
                                        hw_inuse, new_hwi);
-                       __propagate_active_weight(iocg, iocg->weight, new_inuse);
+                       __propagate_weights(iocg, iocg->weight, new_inuse);
                }
        }
 skip_surplus_transfers:
-       commit_active_weights(ioc);
+       commit_weights(ioc);
 
        /*
         * If q is getting clogged or we're missing too much, we're issuing
@@ -1751,7 +1756,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
                TRACE_IOCG_PATH(inuse_reset, iocg, &now,
                                iocg->inuse, iocg->weight, hw_inuse, hw_active);
                spin_lock_irq(&ioc->lock);
-               propagate_active_weight(iocg, iocg->weight, iocg->weight);
+               propagate_weights(iocg, iocg->weight, iocg->weight);
                spin_unlock_irq(&ioc->lock);
                current_hweight(iocg, &hw_active, &hw_inuse);
        }
@@ -1908,6 +1913,7 @@ static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio)
 static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
 {
        struct ioc *ioc = rqos_to_ioc(rqos);
+       struct ioc_pcpu_stat *ccs;
        u64 on_q_ns, rq_wait_ns, size_nsec;
        int pidx, rw;
 
@@ -1931,13 +1937,17 @@ static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
        rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns;
        size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC);
 
+       ccs = get_cpu_ptr(ioc->pcpu_stat);
+
        if (on_q_ns <= size_nsec ||
            on_q_ns - size_nsec <= ioc->params.qos[pidx] * NSEC_PER_USEC)
-               this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_met);
+               local_inc(&ccs->missed[rw].nr_met);
        else
-               this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_missed);
+               local_inc(&ccs->missed[rw].nr_missed);
+
+       local64_add(rq_wait_ns, &ccs->rq_wait_ns);
 
-       this_cpu_add(ioc->pcpu_stat->rq_wait_ns, rq_wait_ns);
+       put_cpu_ptr(ccs);
 }
 
 static void ioc_rqos_queue_depth_changed(struct rq_qos *rqos)
@@ -1977,7 +1987,7 @@ static int blk_iocost_init(struct request_queue *q)
 {
        struct ioc *ioc;
        struct rq_qos *rqos;
-       int ret;
+       int i, cpu, ret;
 
        ioc = kzalloc(sizeof(*ioc), GFP_KERNEL);
        if (!ioc)
@@ -1989,6 +1999,16 @@ static int blk_iocost_init(struct request_queue *q)
                return -ENOMEM;
        }
 
+       for_each_possible_cpu(cpu) {
+               struct ioc_pcpu_stat *ccs = per_cpu_ptr(ioc->pcpu_stat, cpu);
+
+               for (i = 0; i < ARRAY_SIZE(ccs->missed); i++) {
+                       local_set(&ccs->missed[i].nr_met, 0);
+                       local_set(&ccs->missed[i].nr_missed, 0);
+               }
+               local64_set(&ccs->rq_wait_ns, 0);
+       }
+
        rqos = &ioc->rqos;
        rqos->id = RQ_QOS_COST;
        rqos->ops = &ioc_rqos_ops;
@@ -2092,14 +2112,15 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
 {
        struct ioc_gq *iocg = pd_to_iocg(pd);
        struct ioc *ioc = iocg->ioc;
+       unsigned long flags;
 
        if (ioc) {
-               spin_lock(&ioc->lock);
+               spin_lock_irqsave(&ioc->lock, flags);
                if (!list_empty(&iocg->active_list)) {
-                       propagate_active_weight(iocg, 0, 0);
+                       propagate_weights(iocg, 0, 0);
                        list_del_init(&iocg->active_list);
                }
-               spin_unlock(&ioc->lock);
+               spin_unlock_irqrestore(&ioc->lock, flags);
 
                hrtimer_cancel(&iocg->waitq_timer);
                hrtimer_cancel(&iocg->delay_timer);