#include <linux/parser.h>
#include <linux/sched/signal.h>
#include <linux/blk-cgroup.h>
+#include <asm/local.h>
+#include <asm/local64.h>
#include "blk-rq-qos.h"
#include "blk-stat.h"
#include "blk-wbt.h"
};
struct ioc_missed {
- u32 nr_met;
- u32 nr_missed;
+ local_t nr_met;
+ local_t nr_missed;
u32 last_met;
u32 last_missed;
};
struct ioc_pcpu_stat {
struct ioc_missed missed[2];
- u64 rq_wait_ns;
+ local64_t rq_wait_ns;
u64 last_rq_wait_ns;
};
atomic64_t active_period;
struct list_head active_list;
- /* see __propagate_active_weight() and current_hweight() for details */
+ /* see __propagate_weights() and current_hweight() for details */
u64 child_active_sum;
u64 child_inuse_sum;
int hweight_gen;
* Update @iocg's `active` and `inuse` to @active and @inuse, update level
* weight sums and propagate upwards accordingly.
*/
-static void __propagate_active_weight(struct ioc_gq *iocg, u32 active, u32 inuse)
+static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
{
struct ioc *ioc = iocg->ioc;
int lvl;
lockdep_assert_held(&ioc->lock);
- inuse = min(active, inuse);
+ inuse = clamp_t(u32, inuse, 1, active);
+
+ if (active == iocg->active && inuse == iocg->inuse)
+ return;
for (lvl = iocg->level - 1; lvl >= 0; lvl--) {
struct ioc_gq *parent = iocg->ancestors[lvl];
ioc->weights_updated = true;
}
-static void commit_active_weights(struct ioc *ioc)
+static void commit_weights(struct ioc *ioc)
{
lockdep_assert_held(&ioc->lock);
}
}
-static void propagate_active_weight(struct ioc_gq *iocg, u32 active, u32 inuse)
+static void propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
{
- __propagate_active_weight(iocg, active, inuse);
- commit_active_weights(iocg->ioc);
+ __propagate_weights(iocg, active, inuse);
+ commit_weights(iocg->ioc);
}
static void current_hweight(struct ioc_gq *iocg, u32 *hw_activep, u32 *hw_inusep)
goto out;
/*
- * Paired with wmb in commit_active_weights(). If we saw the
- * updated hweight_gen, all the weight updates from
- * __propagate_active_weight() are visible too.
+ * Paired with wmb in commit_weights(). If we saw the updated
+ * hweight_gen, all the weight updates from __propagate_weights() are
+ * visible too.
*
* We can race with weight updates during calculation and get it
* wrong. However, hweight_gen would have changed and a future
weight = iocg->cfg_weight ?: iocc->dfl_weight;
if (weight != iocg->weight && iocg->active)
- propagate_active_weight(iocg, weight,
+ propagate_weights(iocg, weight,
DIV64_U64_ROUND_UP(iocg->inuse * weight, iocg->weight));
iocg->weight = weight;
}
*/
iocg->hweight_gen = atomic_read(&ioc->hweight_gen) - 1;
list_add(&iocg->active_list, &ioc->active_iocgs);
- propagate_active_weight(iocg, iocg->weight,
- iocg->last_inuse ?: iocg->weight);
+ propagate_weights(iocg, iocg->weight,
+ iocg->last_inuse ?: iocg->weight);
TRACE_IOCG_PATH(iocg_activate, iocg, now,
last_period, cur_period, vtime);
return false;
}
+static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
+{
+ struct ioc *ioc = iocg->ioc;
+ struct blkcg_gq *blkg = iocg_to_blkg(iocg);
+ u64 vtime = atomic64_read(&iocg->vtime);
+ u64 vmargin = ioc->margin_us * now->vrate;
+ u64 margin_ns = ioc->margin_us * NSEC_PER_USEC;
+ u64 delta_ns, expires, oexpires;
+ u32 hw_inuse;
+
+ lockdep_assert_held(&iocg->waitq.lock);
+
+ /* debt-adjust vtime */
+ current_hweight(iocg, NULL, &hw_inuse);
+ vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
+
+ /*
+ * Clear or maintain depending on the overage. Non-zero vdebt is what
+ * guarantees that @iocg is online and future iocg_kick_delay() will
+ * clear use_delay. Don't leave it on when there's no vdebt.
+ */
+ if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) {
+ blkcg_clear_delay(blkg);
+ return false;
+ }
+ if (!atomic_read(&blkg->use_delay) &&
+ time_before_eq64(vtime, now->vnow + vmargin))
+ return false;
+
+ /* use delay */
+ delta_ns = DIV64_U64_ROUND_UP(vtime - now->vnow,
+ now->vrate) * NSEC_PER_USEC;
+ blkcg_set_delay(blkg, delta_ns);
+ expires = now->now_ns + delta_ns;
+
+ /* if already active and close enough, don't bother */
+ oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->delay_timer));
+ if (hrtimer_is_queued(&iocg->delay_timer) &&
+ abs(oexpires - expires) <= margin_ns / 4)
+ return true;
+
+ hrtimer_start_range_ns(&iocg->delay_timer, ns_to_ktime(expires),
+ margin_ns / 4, HRTIMER_MODE_ABS);
+ return true;
+}
+
+static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
+{
+ struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer);
+ struct ioc_now now;
+ unsigned long flags;
+
+ spin_lock_irqsave(&iocg->waitq.lock, flags);
+ ioc_now(iocg->ioc, &now);
+ iocg_kick_delay(iocg, &now);
+ spin_unlock_irqrestore(&iocg->waitq.lock, flags);
+
+ return HRTIMER_NORESTART;
+}
+
static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode,
int flags, void *key)
{
return HRTIMER_NORESTART;
}
-static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now)
-{
- struct ioc *ioc = iocg->ioc;
- struct blkcg_gq *blkg = iocg_to_blkg(iocg);
- u64 vtime = atomic64_read(&iocg->vtime);
- u64 vmargin = ioc->margin_us * now->vrate;
- u64 margin_ns = ioc->margin_us * NSEC_PER_USEC;
- u64 delta_ns, expires, oexpires;
- u32 hw_inuse;
-
- lockdep_assert_held(&iocg->waitq.lock);
-
- /* debt-adjust vtime */
- current_hweight(iocg, NULL, &hw_inuse);
- vtime += abs_cost_to_cost(iocg->abs_vdebt, hw_inuse);
-
- /*
- * Clear or maintain depending on the overage. Non-zero vdebt is what
- * guarantees that @iocg is online and future iocg_kick_delay() will
- * clear use_delay. Don't leave it on when there's no vdebt.
- */
- if (!iocg->abs_vdebt || time_before_eq64(vtime, now->vnow)) {
- blkcg_clear_delay(blkg);
- return false;
- }
- if (!atomic_read(&blkg->use_delay) &&
- time_before_eq64(vtime, now->vnow + vmargin))
- return false;
-
- /* use delay */
- delta_ns = DIV64_U64_ROUND_UP(vtime - now->vnow,
- now->vrate) * NSEC_PER_USEC;
- blkcg_set_delay(blkg, delta_ns);
- expires = now->now_ns + delta_ns;
-
- /* if already active and close enough, don't bother */
- oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->delay_timer));
- if (hrtimer_is_queued(&iocg->delay_timer) &&
- abs(oexpires - expires) <= margin_ns / 4)
- return true;
-
- hrtimer_start_range_ns(&iocg->delay_timer, ns_to_ktime(expires),
- margin_ns / 4, HRTIMER_MODE_ABS);
- return true;
-}
-
-static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
-{
- struct ioc_gq *iocg = container_of(timer, struct ioc_gq, delay_timer);
- struct ioc_now now;
- unsigned long flags;
-
- spin_lock_irqsave(&iocg->waitq.lock, flags);
- ioc_now(iocg->ioc, &now);
- iocg_kick_delay(iocg, &now);
- spin_unlock_irqrestore(&iocg->waitq.lock, flags);
-
- return HRTIMER_NORESTART;
-}
-
static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p)
{
u32 nr_met[2] = { };
u64 this_rq_wait_ns;
for (rw = READ; rw <= WRITE; rw++) {
- u32 this_met = READ_ONCE(stat->missed[rw].nr_met);
- u32 this_missed = READ_ONCE(stat->missed[rw].nr_missed);
+ u32 this_met = local_read(&stat->missed[rw].nr_met);
+ u32 this_missed = local_read(&stat->missed[rw].nr_missed);
nr_met[rw] += this_met - stat->missed[rw].last_met;
nr_missed[rw] += this_missed - stat->missed[rw].last_missed;
stat->missed[rw].last_missed = this_missed;
}
- this_rq_wait_ns = READ_ONCE(stat->rq_wait_ns);
+ this_rq_wait_ns = local64_read(&stat->rq_wait_ns);
rq_wait_ns += this_rq_wait_ns - stat->last_rq_wait_ns;
stat->last_rq_wait_ns = this_rq_wait_ns;
}
} else if (iocg_is_idle(iocg)) {
/* no waiter and idle, deactivate */
iocg->last_inuse = iocg->inuse;
- __propagate_active_weight(iocg, 0, 0);
+ __propagate_weights(iocg, 0, 0);
list_del_init(&iocg->active_list);
}
spin_unlock(&iocg->waitq.lock);
}
- commit_active_weights(ioc);
+ commit_weights(ioc);
/* calc usages and see whether some weights need to be moved around */
list_for_each_entry(iocg, &ioc->active_iocgs, active_list) {
TRACE_IOCG_PATH(inuse_takeback, iocg, &now,
iocg->inuse, new_inuse,
hw_inuse, new_hwi);
- __propagate_active_weight(iocg, iocg->weight,
- new_inuse);
+ __propagate_weights(iocg, iocg->weight,
+ new_inuse);
}
} else {
/* genuninely out of vtime */
TRACE_IOCG_PATH(inuse_giveaway, iocg, &now,
iocg->inuse, new_inuse,
hw_inuse, new_hwi);
- __propagate_active_weight(iocg, iocg->weight, new_inuse);
+ __propagate_weights(iocg, iocg->weight, new_inuse);
}
}
skip_surplus_transfers:
- commit_active_weights(ioc);
+ commit_weights(ioc);
/*
* If q is getting clogged or we're missing too much, we're issuing
TRACE_IOCG_PATH(inuse_reset, iocg, &now,
iocg->inuse, iocg->weight, hw_inuse, hw_active);
spin_lock_irq(&ioc->lock);
- propagate_active_weight(iocg, iocg->weight, iocg->weight);
+ propagate_weights(iocg, iocg->weight, iocg->weight);
spin_unlock_irq(&ioc->lock);
current_hweight(iocg, &hw_active, &hw_inuse);
}
static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq)
{
struct ioc *ioc = rqos_to_ioc(rqos);
+ struct ioc_pcpu_stat *ccs;
u64 on_q_ns, rq_wait_ns, size_nsec;
int pidx, rw;
rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns;
size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC);
+ ccs = get_cpu_ptr(ioc->pcpu_stat);
+
if (on_q_ns <= size_nsec ||
on_q_ns - size_nsec <= ioc->params.qos[pidx] * NSEC_PER_USEC)
- this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_met);
+ local_inc(&ccs->missed[rw].nr_met);
else
- this_cpu_inc(ioc->pcpu_stat->missed[rw].nr_missed);
+ local_inc(&ccs->missed[rw].nr_missed);
+
+ local64_add(rq_wait_ns, &ccs->rq_wait_ns);
- this_cpu_add(ioc->pcpu_stat->rq_wait_ns, rq_wait_ns);
+ put_cpu_ptr(ccs);
}
static void ioc_rqos_queue_depth_changed(struct rq_qos *rqos)
{
struct ioc *ioc;
struct rq_qos *rqos;
- int ret;
+ int i, cpu, ret;
ioc = kzalloc(sizeof(*ioc), GFP_KERNEL);
if (!ioc)
return -ENOMEM;
}
+ for_each_possible_cpu(cpu) {
+ struct ioc_pcpu_stat *ccs = per_cpu_ptr(ioc->pcpu_stat, cpu);
+
+ for (i = 0; i < ARRAY_SIZE(ccs->missed); i++) {
+ local_set(&ccs->missed[i].nr_met, 0);
+ local_set(&ccs->missed[i].nr_missed, 0);
+ }
+ local64_set(&ccs->rq_wait_ns, 0);
+ }
+
rqos = &ioc->rqos;
rqos->id = RQ_QOS_COST;
rqos->ops = &ioc_rqos_ops;
{
struct ioc_gq *iocg = pd_to_iocg(pd);
struct ioc *ioc = iocg->ioc;
+ unsigned long flags;
if (ioc) {
- spin_lock(&ioc->lock);
+ spin_lock_irqsave(&ioc->lock, flags);
if (!list_empty(&iocg->active_list)) {
- propagate_active_weight(iocg, 0, 0);
+ propagate_weights(iocg, 0, 0);
list_del_init(&iocg->active_list);
}
- spin_unlock(&ioc->lock);
+ spin_unlock_irqrestore(&ioc->lock, flags);
hrtimer_cancel(&iocg->waitq_timer);
hrtimer_cancel(&iocg->delay_timer);