#define MAX_KEY_LEN 100
-static DEFINE_SPINLOCK(blkio_list_lock);
-static LIST_HEAD(blkio_list);
+static DEFINE_MUTEX(blkcg_pol_mutex);
-static DEFINE_MUTEX(all_q_mutex);
-static LIST_HEAD(all_q_list);
-
-/* List of groups pending per cpu stats allocation */
-static DEFINE_SPINLOCK(alloc_list_lock);
-static LIST_HEAD(alloc_list);
-
-static void blkio_stat_alloc_fn(struct work_struct *);
-static DECLARE_DELAYED_WORK(blkio_stat_alloc_work, blkio_stat_alloc_fn);
-
-struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
+struct blkio_cgroup blkio_root_cgroup = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT };
EXPORT_SYMBOL_GPL(blkio_root_cgroup);
-static struct blkio_policy_type *blkio_policy[BLKIO_NR_POLICIES];
+static struct blkio_policy_type *blkio_policy[BLKCG_MAX_POLS];
struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
{
}
EXPORT_SYMBOL_GPL(bio_blkio_cgroup);
-static inline void blkio_update_group_weight(struct blkio_group *blkg,
- int plid, unsigned int weight)
+static bool blkcg_policy_enabled(struct request_queue *q,
+ const struct blkio_policy_type *pol)
{
- struct blkio_policy_type *blkiop;
-
- list_for_each_entry(blkiop, &blkio_list, list) {
- /* If this policy does not own the blkg, do not send updates */
- if (blkiop->plid != plid)
- continue;
- if (blkiop->ops.blkio_update_group_weight_fn)
- blkiop->ops.blkio_update_group_weight_fn(blkg->q,
- blkg, weight);
- }
+ return pol && test_bit(pol->plid, q->blkcg_pols);
}
-static inline void blkio_update_group_bps(struct blkio_group *blkg, int plid,
- u64 bps, int rw)
+static size_t blkg_pd_size(const struct blkio_policy_type *pol)
{
- struct blkio_policy_type *blkiop;
-
- list_for_each_entry(blkiop, &blkio_list, list) {
-
- /* If this policy does not own the blkg, do not send updates */
- if (blkiop->plid != plid)
- continue;
-
- if (rw == READ && blkiop->ops.blkio_update_group_read_bps_fn)
- blkiop->ops.blkio_update_group_read_bps_fn(blkg->q,
- blkg, bps);
-
- if (rw == WRITE && blkiop->ops.blkio_update_group_write_bps_fn)
- blkiop->ops.blkio_update_group_write_bps_fn(blkg->q,
- blkg, bps);
- }
-}
-
-static inline void blkio_update_group_iops(struct blkio_group *blkg, int plid,
- u64 iops, int rw)
-{
- struct blkio_policy_type *blkiop;
-
- list_for_each_entry(blkiop, &blkio_list, list) {
-
- /* If this policy does not own the blkg, do not send updates */
- if (blkiop->plid != plid)
- continue;
-
- if (rw == READ && blkiop->ops.blkio_update_group_read_iops_fn)
- blkiop->ops.blkio_update_group_read_iops_fn(blkg->q,
- blkg, iops);
-
- if (rw == WRITE && blkiop->ops.blkio_update_group_write_iops_fn)
- blkiop->ops.blkio_update_group_write_iops_fn(blkg->q,
- blkg,iops);
- }
-}
-
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-/* This should be called with the queue_lock held. */
-static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
- struct blkio_policy_type *pol,
- struct blkio_group *curr_blkg)
-{
- struct blkg_policy_data *pd = blkg->pd[pol->plid];
-
- if (blkio_blkg_waiting(&pd->stats))
- return;
- if (blkg == curr_blkg)
- return;
- pd->stats.start_group_wait_time = sched_clock();
- blkio_mark_blkg_waiting(&pd->stats);
-}
-
-/* This should be called with the queue_lock held. */
-static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
-{
- unsigned long long now;
-
- if (!blkio_blkg_waiting(stats))
- return;
-
- now = sched_clock();
- if (time_after64(now, stats->start_group_wait_time))
- blkg_stat_add(&stats->group_wait_time,
- now - stats->start_group_wait_time);
- blkio_clear_blkg_waiting(stats);
-}
-
-/* This should be called with the queue_lock held. */
-static void blkio_end_empty_time(struct blkio_group_stats *stats)
-{
- unsigned long long now;
-
- if (!blkio_blkg_empty(stats))
- return;
-
- now = sched_clock();
- if (time_after64(now, stats->start_empty_time))
- blkg_stat_add(&stats->empty_time,
- now - stats->start_empty_time);
- blkio_clear_blkg_empty(stats);
-}
-
-void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
- struct blkio_policy_type *pol)
-{
- struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-
- lockdep_assert_held(blkg->q->queue_lock);
- BUG_ON(blkio_blkg_idling(stats));
-
- stats->start_idle_time = sched_clock();
- blkio_mark_blkg_idling(stats);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats);
-
-void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
- struct blkio_policy_type *pol)
-{
- struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-
- lockdep_assert_held(blkg->q->queue_lock);
-
- if (blkio_blkg_idling(stats)) {
- unsigned long long now = sched_clock();
-
- if (time_after64(now, stats->start_idle_time))
- blkg_stat_add(&stats->idle_time,
- now - stats->start_idle_time);
- blkio_clear_blkg_idling(stats);
- }
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats);
-
-void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
- struct blkio_policy_type *pol)
-{
- struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-
- lockdep_assert_held(blkg->q->queue_lock);
-
- blkg_stat_add(&stats->avg_queue_size_sum,
- blkg_rwstat_sum(&stats->queued));
- blkg_stat_add(&stats->avg_queue_size_samples, 1);
- blkio_update_group_wait_time(stats);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
-
-void blkiocg_set_start_empty_time(struct blkio_group *blkg,
- struct blkio_policy_type *pol)
-{
- struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-
- lockdep_assert_held(blkg->q->queue_lock);
-
- if (blkg_rwstat_sum(&stats->queued))
- return;
-
- /*
- * group is already marked empty. This can happen if cfqq got new
- * request in parent group and moved to this group while being added
- * to service tree. Just ignore the event and move on.
- */
- if (blkio_blkg_empty(stats))
- return;
-
- stats->start_empty_time = sched_clock();
- blkio_mark_blkg_empty(stats);
-}
-EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time);
-
-void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
- struct blkio_policy_type *pol,
- unsigned long dequeue)
-{
- struct blkg_policy_data *pd = blkg->pd[pol->plid];
-
- lockdep_assert_held(blkg->q->queue_lock);
-
- blkg_stat_add(&pd->stats.dequeue, dequeue);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
-#else
-static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg,
- struct blkio_policy_type *pol,
- struct blkio_group *curr_blkg) { }
-static inline void blkio_end_empty_time(struct blkio_group_stats *stats) { }
-#endif
-
-void blkiocg_update_io_add_stats(struct blkio_group *blkg,
- struct blkio_policy_type *pol,
- struct blkio_group *curr_blkg, bool direction,
- bool sync)
-{
- struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
- int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-
- lockdep_assert_held(blkg->q->queue_lock);
-
- blkg_rwstat_add(&stats->queued, rw, 1);
- blkio_end_empty_time(stats);
- blkio_set_start_group_wait_time(blkg, pol, curr_blkg);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
-
-void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
- struct blkio_policy_type *pol,
- bool direction, bool sync)
-{
- struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
- int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-
- lockdep_assert_held(blkg->q->queue_lock);
-
- blkg_rwstat_add(&stats->queued, rw, -1);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
-
-void blkiocg_update_timeslice_used(struct blkio_group *blkg,
- struct blkio_policy_type *pol,
- unsigned long time,
- unsigned long unaccounted_time)
-{
- struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
-
- lockdep_assert_held(blkg->q->queue_lock);
-
- blkg_stat_add(&stats->time, time);
-#ifdef CONFIG_DEBUG_BLK_CGROUP
- blkg_stat_add(&stats->unaccounted_time, unaccounted_time);
-#endif
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
-
-/*
- * should be called under rcu read lock or queue lock to make sure blkg pointer
- * is valid.
- */
-void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
- struct blkio_policy_type *pol,
- uint64_t bytes, bool direction, bool sync)
-{
- int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
- struct blkg_policy_data *pd = blkg->pd[pol->plid];
- struct blkio_group_stats_cpu *stats_cpu;
- unsigned long flags;
-
- /* If per cpu stats are not allocated yet, don't do any accounting. */
- if (pd->stats_cpu == NULL)
- return;
-
- /*
- * Disabling interrupts to provide mutual exclusion between two
- * writes on same cpu. It probably is not needed for 64bit. Not
- * optimizing that case yet.
- */
- local_irq_save(flags);
-
- stats_cpu = this_cpu_ptr(pd->stats_cpu);
-
- blkg_stat_add(&stats_cpu->sectors, bytes >> 9);
- blkg_rwstat_add(&stats_cpu->serviced, rw, 1);
- blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes);
-
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
-
-void blkiocg_update_completion_stats(struct blkio_group *blkg,
- struct blkio_policy_type *pol,
- uint64_t start_time,
- uint64_t io_start_time, bool direction,
- bool sync)
-{
- struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
- unsigned long long now = sched_clock();
- int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-
- lockdep_assert_held(blkg->q->queue_lock);
-
- if (time_after64(now, io_start_time))
- blkg_rwstat_add(&stats->service_time, rw, now - io_start_time);
- if (time_after64(io_start_time, start_time))
- blkg_rwstat_add(&stats->wait_time, rw,
- io_start_time - start_time);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
-
-/* Merged stats are per cpu. */
-void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
- struct blkio_policy_type *pol,
- bool direction, bool sync)
-{
- struct blkio_group_stats *stats = &blkg->pd[pol->plid]->stats;
- int rw = (direction ? REQ_WRITE : 0) | (sync ? REQ_SYNC : 0);
-
- lockdep_assert_held(blkg->q->queue_lock);
-
- blkg_rwstat_add(&stats->merged, rw, 1);
-}
-EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
-
-/*
- * Worker for allocating per cpu stat for blk groups. This is scheduled on
- * the system_nrt_wq once there are some groups on the alloc_list waiting
- * for allocation.
- */
-static void blkio_stat_alloc_fn(struct work_struct *work)
-{
- static void *pcpu_stats[BLKIO_NR_POLICIES];
- struct delayed_work *dwork = to_delayed_work(work);
- struct blkio_group *blkg;
- int i;
- bool empty = false;
-
-alloc_stats:
- for (i = 0; i < BLKIO_NR_POLICIES; i++) {
- if (pcpu_stats[i] != NULL)
- continue;
-
- pcpu_stats[i] = alloc_percpu(struct blkio_group_stats_cpu);
-
- /* Allocation failed. Try again after some time. */
- if (pcpu_stats[i] == NULL) {
- queue_delayed_work(system_nrt_wq, dwork,
- msecs_to_jiffies(10));
- return;
- }
- }
-
- spin_lock_irq(&blkio_list_lock);
- spin_lock(&alloc_list_lock);
-
- /* cgroup got deleted or queue exited. */
- if (!list_empty(&alloc_list)) {
- blkg = list_first_entry(&alloc_list, struct blkio_group,
- alloc_node);
- for (i = 0; i < BLKIO_NR_POLICIES; i++) {
- struct blkg_policy_data *pd = blkg->pd[i];
-
- if (blkio_policy[i] && pd && !pd->stats_cpu)
- swap(pd->stats_cpu, pcpu_stats[i]);
- }
-
- list_del_init(&blkg->alloc_node);
- }
-
- empty = list_empty(&alloc_list);
-
- spin_unlock(&alloc_list_lock);
- spin_unlock_irq(&blkio_list_lock);
-
- if (!empty)
- goto alloc_stats;
+ return sizeof(struct blkg_policy_data) + pol->pdata_size;
}
/**
if (!blkg)
return;
- for (i = 0; i < BLKIO_NR_POLICIES; i++) {
+ for (i = 0; i < BLKCG_MAX_POLS; i++) {
+ struct blkio_policy_type *pol = blkio_policy[i];
struct blkg_policy_data *pd = blkg->pd[i];
- if (pd) {
- free_percpu(pd->stats_cpu);
- kfree(pd);
- }
+ if (!pd)
+ continue;
+
+ if (pol && pol->ops.blkio_exit_group_fn)
+ pol->ops.blkio_exit_group_fn(blkg);
+
+ kfree(pd);
}
kfree(blkg);
blkg->q = q;
INIT_LIST_HEAD(&blkg->q_node);
- INIT_LIST_HEAD(&blkg->alloc_node);
blkg->blkcg = blkcg;
blkg->refcnt = 1;
cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
- for (i = 0; i < BLKIO_NR_POLICIES; i++) {
+ for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkio_policy_type *pol = blkio_policy[i];
struct blkg_policy_data *pd;
- if (!pol)
+ if (!blkcg_policy_enabled(q, pol))
continue;
/* alloc per-policy data and attach it to blkg */
- pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
- q->node);
+ pd = kzalloc_node(blkg_pd_size(pol), GFP_ATOMIC, q->node);
if (!pd) {
blkg_free(blkg);
return NULL;
}
/* invoke per-policy init */
- for (i = 0; i < BLKIO_NR_POLICIES; i++) {
+ for (i = 0; i < BLKCG_MAX_POLS; i++) {
struct blkio_policy_type *pol = blkio_policy[i];
- if (pol)
+ if (blkcg_policy_enabled(blkg->q, pol))
pol->ops.blkio_init_group_fn(blkg);
}
return blkg;
}
-struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
- struct request_queue *q,
- bool for_root)
+static struct blkio_group *__blkg_lookup(struct blkio_cgroup *blkcg,
+ struct request_queue *q)
+{
+ struct blkio_group *blkg;
+ struct hlist_node *n;
+
+ hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
+ if (blkg->q == q)
+ return blkg;
+ return NULL;
+}
+
+/**
+ * blkg_lookup - lookup blkg for the specified blkcg - q pair
+ * @blkcg: blkcg of interest
+ * @q: request_queue of interest
+ *
+ * Lookup blkg for the @blkcg - @q pair. This function should be called
+ * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
+ * - see blk_queue_bypass_start() for details.
+ */
+struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
+ struct request_queue *q)
+{
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ if (unlikely(blk_queue_bypass(q)))
+ return NULL;
+ return __blkg_lookup(blkcg, q);
+}
+EXPORT_SYMBOL_GPL(blkg_lookup);
+
+static struct blkio_group *__blkg_lookup_create(struct blkio_cgroup *blkcg,
+ struct request_queue *q)
__releases(q->queue_lock) __acquires(q->queue_lock)
{
struct blkio_group *blkg;
WARN_ON_ONCE(!rcu_read_lock_held());
lockdep_assert_held(q->queue_lock);
- /*
- * This could be the first entry point of blkcg implementation and
- * we shouldn't allow anything to go through for a bypassing queue.
- * The following can be removed if blkg lookup is guaranteed to
- * fail on a bypassing queue.
- */
- if (unlikely(blk_queue_bypass(q)) && !for_root)
- return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
-
- blkg = blkg_lookup(blkcg, q);
+ blkg = __blkg_lookup(blkcg, q);
if (blkg)
return blkg;
hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
list_add(&blkg->q_node, &q->blkg_list);
spin_unlock(&blkcg->lock);
-
- spin_lock(&alloc_list_lock);
- list_add(&blkg->alloc_node, &alloc_list);
- /* Queue per cpu stat allocation from worker thread. */
- queue_delayed_work(system_nrt_wq, &blkio_stat_alloc_work, 0);
- spin_unlock(&alloc_list_lock);
out:
return blkg;
}
-EXPORT_SYMBOL_GPL(blkg_lookup_create);
-/* called under rcu_read_lock(). */
-struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
- struct request_queue *q)
+struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
+ struct request_queue *q)
{
- struct blkio_group *blkg;
- struct hlist_node *n;
-
- hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
- if (blkg->q == q)
- return blkg;
- return NULL;
+ /*
+ * This could be the first entry point of blkcg implementation and
+ * we shouldn't allow anything to go through for a bypassing queue.
+ */
+ if (unlikely(blk_queue_bypass(q)))
+ return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
+ return __blkg_lookup_create(blkcg, q);
}
-EXPORT_SYMBOL_GPL(blkg_lookup);
+EXPORT_SYMBOL_GPL(blkg_lookup_create);
static void blkg_destroy(struct blkio_group *blkg)
{
list_del_init(&blkg->q_node);
hlist_del_init_rcu(&blkg->blkcg_node);
- spin_lock(&alloc_list_lock);
- list_del_init(&blkg->alloc_node);
- spin_unlock(&alloc_list_lock);
-
/*
* Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed.
blkg_put(blkg);
}
-/*
- * XXX: This updates blkg policy data in-place for root blkg, which is
- * necessary across elevator switch and policy registration as root blkgs
- * aren't shot down. This broken and racy implementation is temporary.
- * Eventually, blkg shoot down will be replaced by proper in-place update.
- */
-void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
-{
- struct blkio_policy_type *pol = blkio_policy[plid];
- struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
- struct blkg_policy_data *pd;
-
- if (!blkg)
- return;
-
- kfree(blkg->pd[plid]);
- blkg->pd[plid] = NULL;
-
- if (!pol)
- return;
-
- pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
- WARN_ON_ONCE(!pd);
-
- pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
- WARN_ON_ONCE(!pd->stats_cpu);
-
- blkg->pd[plid] = pd;
- pd->blkg = blkg;
- pol->ops.blkio_init_group_fn(blkg);
-}
-EXPORT_SYMBOL_GPL(update_root_blkg_pd);
-
/**
* blkg_destroy_all - destroy all blkgs associated with a request_queue
* @q: request_queue of interest
- * @destroy_root: whether to destroy root blkg or not
*
- * Destroy blkgs associated with @q. If @destroy_root is %true, all are
- * destroyed; otherwise, root blkg is left alone.
+ * Destroy all blkgs associated with @q.
*/
-void blkg_destroy_all(struct request_queue *q, bool destroy_root)
+static void blkg_destroy_all(struct request_queue *q)
{
struct blkio_group *blkg, *n;
- spin_lock_irq(q->queue_lock);
+ lockdep_assert_held(q->queue_lock);
list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
struct blkio_cgroup *blkcg = blkg->blkcg;
- /* skip root? */
- if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
- continue;
-
spin_lock(&blkcg->lock);
blkg_destroy(blkg);
spin_unlock(&blkcg->lock);
}
-
- spin_unlock_irq(q->queue_lock);
}
-EXPORT_SYMBOL_GPL(blkg_destroy_all);
static void blkg_rcu_free(struct rcu_head *rcu_head)
{
}
EXPORT_SYMBOL_GPL(__blkg_release);
-static void blkio_reset_stats_cpu(struct blkio_group *blkg, int plid)
-{
- struct blkg_policy_data *pd = blkg->pd[plid];
- int cpu;
-
- if (pd->stats_cpu == NULL)
- return;
-
- for_each_possible_cpu(cpu) {
- struct blkio_group_stats_cpu *sc =
- per_cpu_ptr(pd->stats_cpu, cpu);
-
- blkg_rwstat_reset(&sc->service_bytes);
- blkg_rwstat_reset(&sc->serviced);
- blkg_stat_reset(&sc->sectors);
- }
-}
-
static int
blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
{
struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
struct blkio_group *blkg;
struct hlist_node *n;
+ int i;
- spin_lock(&blkio_list_lock);
+ mutex_lock(&blkcg_pol_mutex);
spin_lock_irq(&blkcg->lock);
/*
* anyway. If you get hit by a race, retry.
*/
hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
- struct blkio_policy_type *pol;
-
- list_for_each_entry(pol, &blkio_list, list) {
- struct blkg_policy_data *pd = blkg->pd[pol->plid];
- struct blkio_group_stats *stats = &pd->stats;
-
- /* queued stats shouldn't be cleared */
- blkg_rwstat_reset(&stats->merged);
- blkg_rwstat_reset(&stats->service_time);
- blkg_rwstat_reset(&stats->wait_time);
- blkg_stat_reset(&stats->time);
-#ifdef CONFIG_DEBUG_BLK_CGROUP
- blkg_stat_reset(&stats->unaccounted_time);
- blkg_stat_reset(&stats->avg_queue_size_sum);
- blkg_stat_reset(&stats->avg_queue_size_samples);
- blkg_stat_reset(&stats->dequeue);
- blkg_stat_reset(&stats->group_wait_time);
- blkg_stat_reset(&stats->idle_time);
- blkg_stat_reset(&stats->empty_time);
-#endif
- blkio_reset_stats_cpu(blkg, pol->plid);
+ for (i = 0; i < BLKCG_MAX_POLS; i++) {
+ struct blkio_policy_type *pol = blkio_policy[i];
+
+ if (blkcg_policy_enabled(blkg->q, pol) &&
+ pol->ops.blkio_reset_group_stats_fn)
+ pol->ops.blkio_reset_group_stats_fn(blkg);
}
}
spin_unlock_irq(&blkcg->lock);
- spin_unlock(&blkio_list_lock);
+ mutex_unlock(&blkcg_pol_mutex);
return 0;
}
* cftype->read_seq_string method.
*/
void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg,
- u64 (*prfill)(struct seq_file *, struct blkg_policy_data *, int),
- int pol, int data, bool show_total)
+ u64 (*prfill)(struct seq_file *, void *, int),
+ const struct blkio_policy_type *pol, int data,
+ bool show_total)
{
struct blkio_group *blkg;
struct hlist_node *n;
spin_lock_irq(&blkcg->lock);
hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
- if (blkg->pd[pol])
- total += prfill(sf, blkg->pd[pol], data);
+ if (blkcg_policy_enabled(blkg->q, pol))
+ total += prfill(sf, blkg->pd[pol->plid]->pdata, data);
spin_unlock_irq(&blkcg->lock);
if (show_total)
/**
* __blkg_prfill_u64 - prfill helper for a single u64 value
* @sf: seq_file to print to
- * @pd: policy data of interest
+ * @pdata: policy private data of interest
* @v: value to print
*
- * Print @v to @sf for the device assocaited with @pd.
+ * Print @v to @sf for the device assocaited with @pdata.
*/
-u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v)
+u64 __blkg_prfill_u64(struct seq_file *sf, void *pdata, u64 v)
{
- const char *dname = blkg_dev_name(pd->blkg);
+ const char *dname = blkg_dev_name(pdata_to_blkg(pdata));
if (!dname)
return 0;
/**
* __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
* @sf: seq_file to print to
- * @pd: policy data of interest
+ * @pdata: policy private data of interest
* @rwstat: rwstat to print
*
- * Print @rwstat to @sf for the device assocaited with @pd.
+ * Print @rwstat to @sf for the device assocaited with @pdata.
*/
-u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
+u64 __blkg_prfill_rwstat(struct seq_file *sf, void *pdata,
const struct blkg_rwstat *rwstat)
{
static const char *rwstr[] = {
[BLKG_RWSTAT_SYNC] = "Sync",
[BLKG_RWSTAT_ASYNC] = "Async",
};
- const char *dname = blkg_dev_name(pd->blkg);
+ const char *dname = blkg_dev_name(pdata_to_blkg(pdata));
u64 v;
int i;
return v;
}
-static u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd,
- int off)
-{
- return __blkg_prfill_u64(sf, pd,
- blkg_stat_read((void *)&pd->stats + off));
-}
-
-static u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
- int off)
-{
- struct blkg_rwstat rwstat = blkg_rwstat_read((void *)&pd->stats + off);
-
- return __blkg_prfill_rwstat(sf, pd, &rwstat);
-}
-
-/* print blkg_stat specified by BLKCG_STAT_PRIV() */
-int blkcg_print_stat(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
-{
- struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-
- blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat,
- BLKCG_STAT_POL(cft->private),
- BLKCG_STAT_OFF(cft->private), false);
- return 0;
-}
-EXPORT_SYMBOL_GPL(blkcg_print_stat);
-
-/* print blkg_rwstat specified by BLKCG_STAT_PRIV() */
-int blkcg_print_rwstat(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
-{
- struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-
- blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat,
- BLKCG_STAT_POL(cft->private),
- BLKCG_STAT_OFF(cft->private), true);
- return 0;
-}
-EXPORT_SYMBOL_GPL(blkcg_print_rwstat);
-
-static u64 blkg_prfill_cpu_stat(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
-{
- u64 v = 0;
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct blkio_group_stats_cpu *sc =
- per_cpu_ptr(pd->stats_cpu, cpu);
-
- v += blkg_stat_read((void *)sc + off);
- }
-
- return __blkg_prfill_u64(sf, pd, v);
-}
-
-static u64 blkg_prfill_cpu_rwstat(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
-{
- struct blkg_rwstat rwstat = { }, tmp;
- int i, cpu;
-
- for_each_possible_cpu(cpu) {
- struct blkio_group_stats_cpu *sc =
- per_cpu_ptr(pd->stats_cpu, cpu);
-
- tmp = blkg_rwstat_read((void *)sc + off);
- for (i = 0; i < BLKG_RWSTAT_NR; i++)
- rwstat.cnt[i] += tmp.cnt[i];
- }
-
- return __blkg_prfill_rwstat(sf, pd, &rwstat);
-}
-
-/* print per-cpu blkg_stat specified by BLKCG_STAT_PRIV() */
-int blkcg_print_cpu_stat(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
-{
- struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-
- blkcg_print_blkgs(sf, blkcg, blkg_prfill_cpu_stat,
- BLKCG_STAT_POL(cft->private),
- BLKCG_STAT_OFF(cft->private), false);
- return 0;
-}
-EXPORT_SYMBOL_GPL(blkcg_print_cpu_stat);
-
-/* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */
-int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
-{
- struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
-
- blkcg_print_blkgs(sf, blkcg, blkg_prfill_cpu_rwstat,
- BLKCG_STAT_POL(cft->private),
- BLKCG_STAT_OFF(cft->private), true);
- return 0;
-}
-EXPORT_SYMBOL_GPL(blkcg_print_cpu_rwstat);
-
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-static u64 blkg_prfill_avg_queue_size(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
+/**
+ * blkg_prfill_stat - prfill callback for blkg_stat
+ * @sf: seq_file to print to
+ * @pdata: policy private data of interest
+ * @off: offset to the blkg_stat in @pdata
+ *
+ * prfill callback for printing a blkg_stat.
+ */
+u64 blkg_prfill_stat(struct seq_file *sf, void *pdata, int off)
{
- u64 samples = blkg_stat_read(&pd->stats.avg_queue_size_samples);
- u64 v = 0;
-
- if (samples) {
- v = blkg_stat_read(&pd->stats.avg_queue_size_sum);
- do_div(v, samples);
- }
- __blkg_prfill_u64(sf, pd, v);
- return 0;
+ return __blkg_prfill_u64(sf, pdata, blkg_stat_read(pdata + off));
}
+EXPORT_SYMBOL_GPL(blkg_prfill_stat);
-/* print avg_queue_size */
-static int blkcg_print_avg_queue_size(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
+/**
+ * blkg_prfill_rwstat - prfill callback for blkg_rwstat
+ * @sf: seq_file to print to
+ * @pdata: policy private data of interest
+ * @off: offset to the blkg_rwstat in @pdata
+ *
+ * prfill callback for printing a blkg_rwstat.
+ */
+u64 blkg_prfill_rwstat(struct seq_file *sf, void *pdata, int off)
{
- struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
+ struct blkg_rwstat rwstat = blkg_rwstat_read(pdata + off);
- blkcg_print_blkgs(sf, blkcg, blkg_prfill_avg_queue_size,
- BLKIO_POLICY_PROP, 0, false);
- return 0;
+ return __blkg_prfill_rwstat(sf, pdata, &rwstat);
}
-#endif /* CONFIG_DEBUG_BLK_CGROUP */
+EXPORT_SYMBOL_GPL(blkg_prfill_rwstat);
/**
* blkg_conf_prep - parse and prepare for per-blkg config update
* @blkcg: target block cgroup
+ * @pol: target policy
* @input: input string
* @ctx: blkg_conf_ctx to be filled
*
* Parse per-blkg config update from @input and initialize @ctx with the
* result. @ctx->blkg points to the blkg to be updated and @ctx->v the new
- * value. This function returns with RCU read locked and must be paired
- * with blkg_conf_finish().
+ * value. This function returns with RCU read lock and queue lock held and
+ * must be paired with blkg_conf_finish().
*/
-int blkg_conf_prep(struct blkio_cgroup *blkcg, const char *input,
+int blkg_conf_prep(struct blkio_cgroup *blkcg,
+ const struct blkio_policy_type *pol, const char *input,
struct blkg_conf_ctx *ctx)
- __acquires(rcu)
+ __acquires(rcu) __acquires(disk->queue->queue_lock)
{
struct gendisk *disk;
struct blkio_group *blkg;
return -EINVAL;
rcu_read_lock();
-
spin_lock_irq(disk->queue->queue_lock);
- blkg = blkg_lookup_create(blkcg, disk->queue, false);
- spin_unlock_irq(disk->queue->queue_lock);
+
+ if (blkcg_policy_enabled(disk->queue, pol))
+ blkg = blkg_lookup_create(blkcg, disk->queue);
+ else
+ blkg = ERR_PTR(-EINVAL);
if (IS_ERR(blkg)) {
ret = PTR_ERR(blkg);
rcu_read_unlock();
+ spin_unlock_irq(disk->queue->queue_lock);
put_disk(disk);
/*
* If queue was bypassing, we should retry. Do so after a
* with blkg_conf_prep().
*/
void blkg_conf_finish(struct blkg_conf_ctx *ctx)
- __releases(rcu)
+ __releases(ctx->disk->queue->queue_lock) __releases(rcu)
{
+ spin_unlock_irq(ctx->disk->queue->queue_lock);
rcu_read_unlock();
put_disk(ctx->disk);
}
EXPORT_SYMBOL_GPL(blkg_conf_finish);
-/* for propio conf */
-static u64 blkg_prfill_weight_device(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
-{
- if (!pd->conf.weight)
- return 0;
- return __blkg_prfill_u64(sf, pd, pd->conf.weight);
-}
-
-static int blkcg_print_weight_device(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
-{
- blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp),
- blkg_prfill_weight_device, BLKIO_POLICY_PROP, 0,
- false);
- return 0;
-}
-
-static int blkcg_print_weight(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
-{
- seq_printf(sf, "%u\n", cgroup_to_blkio_cgroup(cgrp)->weight);
- return 0;
-}
-
-static int blkcg_set_weight_device(struct cgroup *cgrp, struct cftype *cft,
- const char *buf)
-{
- struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
- struct blkg_policy_data *pd;
- struct blkg_conf_ctx ctx;
- int ret;
-
- ret = blkg_conf_prep(blkcg, buf, &ctx);
- if (ret)
- return ret;
-
- ret = -EINVAL;
- pd = ctx.blkg->pd[BLKIO_POLICY_PROP];
- if (pd && (!ctx.v || (ctx.v >= BLKIO_WEIGHT_MIN &&
- ctx.v <= BLKIO_WEIGHT_MAX))) {
- pd->conf.weight = ctx.v;
- blkio_update_group_weight(ctx.blkg, BLKIO_POLICY_PROP,
- ctx.v ?: blkcg->weight);
- ret = 0;
- }
-
- blkg_conf_finish(&ctx);
- return ret;
-}
-
-static int blkcg_set_weight(struct cgroup *cgrp, struct cftype *cft, u64 val)
-{
- struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
- struct blkio_group *blkg;
- struct hlist_node *n;
-
- if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
- return -EINVAL;
-
- spin_lock(&blkio_list_lock);
- spin_lock_irq(&blkcg->lock);
- blkcg->weight = (unsigned int)val;
-
- hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
- struct blkg_policy_data *pd = blkg->pd[BLKIO_POLICY_PROP];
-
- if (pd && !pd->conf.weight)
- blkio_update_group_weight(blkg, BLKIO_POLICY_PROP,
- blkcg->weight);
- }
-
- spin_unlock_irq(&blkcg->lock);
- spin_unlock(&blkio_list_lock);
- return 0;
-}
-
-/* for blk-throttle conf */
-#ifdef CONFIG_BLK_DEV_THROTTLING
-static u64 blkg_prfill_conf_u64(struct seq_file *sf,
- struct blkg_policy_data *pd, int off)
-{
- u64 v = *(u64 *)((void *)&pd->conf + off);
-
- if (!v)
- return 0;
- return __blkg_prfill_u64(sf, pd, v);
-}
-
-static int blkcg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft,
- struct seq_file *sf)
-{
- blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp),
- blkg_prfill_conf_u64, BLKIO_POLICY_THROTL,
- cft->private, false);
- return 0;
-}
-
-static int blkcg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft,
- const char *buf, int rw,
- void (*update)(struct blkio_group *, int, u64, int))
-{
- struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp);
- struct blkg_policy_data *pd;
- struct blkg_conf_ctx ctx;
- int ret;
-
- ret = blkg_conf_prep(blkcg, buf, &ctx);
- if (ret)
- return ret;
-
- ret = -EINVAL;
- pd = ctx.blkg->pd[BLKIO_POLICY_THROTL];
- if (pd) {
- *(u64 *)((void *)&pd->conf + cft->private) = ctx.v;
- update(ctx.blkg, BLKIO_POLICY_THROTL, ctx.v ?: -1, rw);
- ret = 0;
- }
-
- blkg_conf_finish(&ctx);
- return ret;
-}
-
-static int blkcg_set_conf_bps_r(struct cgroup *cgrp, struct cftype *cft,
- const char *buf)
-{
- return blkcg_set_conf_u64(cgrp, cft, buf, READ, blkio_update_group_bps);
-}
-
-static int blkcg_set_conf_bps_w(struct cgroup *cgrp, struct cftype *cft,
- const char *buf)
-{
- return blkcg_set_conf_u64(cgrp, cft, buf, WRITE, blkio_update_group_bps);
-}
-
-static int blkcg_set_conf_iops_r(struct cgroup *cgrp, struct cftype *cft,
- const char *buf)
-{
- return blkcg_set_conf_u64(cgrp, cft, buf, READ, blkio_update_group_iops);
-}
-
-static int blkcg_set_conf_iops_w(struct cgroup *cgrp, struct cftype *cft,
- const char *buf)
-{
- return blkcg_set_conf_u64(cgrp, cft, buf, WRITE, blkio_update_group_iops);
-}
-#endif
-
struct cftype blkio_files[] = {
- {
- .name = "weight_device",
- .read_seq_string = blkcg_print_weight_device,
- .write_string = blkcg_set_weight_device,
- .max_write_len = 256,
- },
- {
- .name = "weight",
- .read_seq_string = blkcg_print_weight,
- .write_u64 = blkcg_set_weight,
- },
- {
- .name = "time",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
- offsetof(struct blkio_group_stats, time)),
- .read_seq_string = blkcg_print_stat,
- },
- {
- .name = "sectors",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
- offsetof(struct blkio_group_stats_cpu, sectors)),
- .read_seq_string = blkcg_print_cpu_stat,
- },
- {
- .name = "io_service_bytes",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
- offsetof(struct blkio_group_stats_cpu, service_bytes)),
- .read_seq_string = blkcg_print_cpu_rwstat,
- },
- {
- .name = "io_serviced",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
- offsetof(struct blkio_group_stats_cpu, serviced)),
- .read_seq_string = blkcg_print_cpu_rwstat,
- },
- {
- .name = "io_service_time",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
- offsetof(struct blkio_group_stats, service_time)),
- .read_seq_string = blkcg_print_rwstat,
- },
- {
- .name = "io_wait_time",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
- offsetof(struct blkio_group_stats, wait_time)),
- .read_seq_string = blkcg_print_rwstat,
- },
- {
- .name = "io_merged",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
- offsetof(struct blkio_group_stats, merged)),
- .read_seq_string = blkcg_print_rwstat,
- },
- {
- .name = "io_queued",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
- offsetof(struct blkio_group_stats, queued)),
- .read_seq_string = blkcg_print_rwstat,
- },
{
.name = "reset_stats",
.write_u64 = blkiocg_reset_stats,
},
-#ifdef CONFIG_BLK_DEV_THROTTLING
- {
- .name = "throttle.read_bps_device",
- .private = offsetof(struct blkio_group_conf, bps[READ]),
- .read_seq_string = blkcg_print_conf_u64,
- .write_string = blkcg_set_conf_bps_r,
- .max_write_len = 256,
- },
-
- {
- .name = "throttle.write_bps_device",
- .private = offsetof(struct blkio_group_conf, bps[WRITE]),
- .read_seq_string = blkcg_print_conf_u64,
- .write_string = blkcg_set_conf_bps_w,
- .max_write_len = 256,
- },
-
- {
- .name = "throttle.read_iops_device",
- .private = offsetof(struct blkio_group_conf, iops[READ]),
- .read_seq_string = blkcg_print_conf_u64,
- .write_string = blkcg_set_conf_iops_r,
- .max_write_len = 256,
- },
-
- {
- .name = "throttle.write_iops_device",
- .private = offsetof(struct blkio_group_conf, iops[WRITE]),
- .read_seq_string = blkcg_print_conf_u64,
- .write_string = blkcg_set_conf_iops_w,
- .max_write_len = 256,
- },
- {
- .name = "throttle.io_service_bytes",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL,
- offsetof(struct blkio_group_stats_cpu, service_bytes)),
- .read_seq_string = blkcg_print_cpu_rwstat,
- },
- {
- .name = "throttle.io_serviced",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL,
- offsetof(struct blkio_group_stats_cpu, serviced)),
- .read_seq_string = blkcg_print_cpu_rwstat,
- },
-#endif /* CONFIG_BLK_DEV_THROTTLING */
-
-#ifdef CONFIG_DEBUG_BLK_CGROUP
- {
- .name = "avg_queue_size",
- .read_seq_string = blkcg_print_avg_queue_size,
- },
- {
- .name = "group_wait_time",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
- offsetof(struct blkio_group_stats, group_wait_time)),
- .read_seq_string = blkcg_print_stat,
- },
- {
- .name = "idle_time",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
- offsetof(struct blkio_group_stats, idle_time)),
- .read_seq_string = blkcg_print_stat,
- },
- {
- .name = "empty_time",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
- offsetof(struct blkio_group_stats, empty_time)),
- .read_seq_string = blkcg_print_stat,
- },
- {
- .name = "dequeue",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
- offsetof(struct blkio_group_stats, dequeue)),
- .read_seq_string = blkcg_print_stat,
- },
- {
- .name = "unaccounted_time",
- .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP,
- offsetof(struct blkio_group_stats, unaccounted_time)),
- .read_seq_string = blkcg_print_stat,
- },
-#endif
{ } /* terminate */
};
if (!blkcg)
return ERR_PTR(-ENOMEM);
- blkcg->weight = BLKIO_WEIGHT_DEFAULT;
+ blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT;
blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
done:
spin_lock_init(&blkcg->lock);
*/
int blkcg_init_queue(struct request_queue *q)
{
- int ret;
-
might_sleep();
- ret = blk_throtl_init(q);
- if (ret)
- return ret;
-
- mutex_lock(&all_q_mutex);
- INIT_LIST_HEAD(&q->all_q_node);
- list_add_tail(&q->all_q_node, &all_q_list);
- mutex_unlock(&all_q_mutex);
-
- return 0;
+ return blk_throtl_init(q);
}
/**
*/
void blkcg_exit_queue(struct request_queue *q)
{
- mutex_lock(&all_q_mutex);
- list_del_init(&q->all_q_node);
- mutex_unlock(&all_q_mutex);
-
- blkg_destroy_all(q, true);
+ spin_lock_irq(q->queue_lock);
+ blkg_destroy_all(q);
+ spin_unlock_irq(q->queue_lock);
blk_throtl_exit(q);
}
return ret;
}
-static void blkcg_bypass_start(void)
- __acquires(&all_q_mutex)
-{
- struct request_queue *q;
-
- mutex_lock(&all_q_mutex);
-
- list_for_each_entry(q, &all_q_list, all_q_node) {
- blk_queue_bypass_start(q);
- blkg_destroy_all(q, false);
- }
-}
-
-static void blkcg_bypass_end(void)
- __releases(&all_q_mutex)
-{
- struct request_queue *q;
-
- list_for_each_entry(q, &all_q_list, all_q_node)
- blk_queue_bypass_end(q);
-
- mutex_unlock(&all_q_mutex);
-}
-
struct cgroup_subsys blkio_subsys = {
.name = "blkio",
.create = blkiocg_create,
};
EXPORT_SYMBOL_GPL(blkio_subsys);
-void blkio_policy_register(struct blkio_policy_type *blkiop)
+/**
+ * blkcg_activate_policy - activate a blkcg policy on a request_queue
+ * @q: request_queue of interest
+ * @pol: blkcg policy to activate
+ *
+ * Activate @pol on @q. Requires %GFP_KERNEL context. @q goes through
+ * bypass mode to populate its blkgs with policy_data for @pol.
+ *
+ * Activation happens with @q bypassed, so nobody would be accessing blkgs
+ * from IO path. Update of each blkg is protected by both queue and blkcg
+ * locks so that holding either lock and testing blkcg_policy_enabled() is
+ * always enough for dereferencing policy data.
+ *
+ * The caller is responsible for synchronizing [de]activations and policy
+ * [un]registerations. Returns 0 on success, -errno on failure.
+ */
+int blkcg_activate_policy(struct request_queue *q,
+ const struct blkio_policy_type *pol)
+{
+ LIST_HEAD(pds);
+ struct blkio_group *blkg;
+ struct blkg_policy_data *pd, *n;
+ int cnt = 0, ret;
+
+ if (blkcg_policy_enabled(q, pol))
+ return 0;
+
+ blk_queue_bypass_start(q);
+
+ /* make sure the root blkg exists and count the existing blkgs */
+ spin_lock_irq(q->queue_lock);
+
+ rcu_read_lock();
+ blkg = __blkg_lookup_create(&blkio_root_cgroup, q);
+ rcu_read_unlock();
+
+ if (IS_ERR(blkg)) {
+ ret = PTR_ERR(blkg);
+ goto out_unlock;
+ }
+ q->root_blkg = blkg;
+
+ list_for_each_entry(blkg, &q->blkg_list, q_node)
+ cnt++;
+
+ spin_unlock_irq(q->queue_lock);
+
+ /* allocate policy_data for all existing blkgs */
+ while (cnt--) {
+ pd = kzalloc_node(blkg_pd_size(pol), GFP_KERNEL, q->node);
+ if (!pd) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+ list_add_tail(&pd->alloc_node, &pds);
+ }
+
+ /*
+ * Install the allocated pds. With @q bypassing, no new blkg
+ * should have been created while the queue lock was dropped.
+ */
+ spin_lock_irq(q->queue_lock);
+
+ list_for_each_entry(blkg, &q->blkg_list, q_node) {
+ if (WARN_ON(list_empty(&pds))) {
+ /* umm... this shouldn't happen, just abort */
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+ pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node);
+ list_del_init(&pd->alloc_node);
+
+ /* grab blkcg lock too while installing @pd on @blkg */
+ spin_lock(&blkg->blkcg->lock);
+
+ blkg->pd[pol->plid] = pd;
+ pd->blkg = blkg;
+ pol->ops.blkio_init_group_fn(blkg);
+
+ spin_unlock(&blkg->blkcg->lock);
+ }
+
+ __set_bit(pol->plid, q->blkcg_pols);
+ ret = 0;
+out_unlock:
+ spin_unlock_irq(q->queue_lock);
+out_free:
+ blk_queue_bypass_end(q);
+ list_for_each_entry_safe(pd, n, &pds, alloc_node)
+ kfree(pd);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blkcg_activate_policy);
+
+/**
+ * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue
+ * @q: request_queue of interest
+ * @pol: blkcg policy to deactivate
+ *
+ * Deactivate @pol on @q. Follows the same synchronization rules as
+ * blkcg_activate_policy().
+ */
+void blkcg_deactivate_policy(struct request_queue *q,
+ const struct blkio_policy_type *pol)
+{
+ struct blkio_group *blkg;
+
+ if (!blkcg_policy_enabled(q, pol))
+ return;
+
+ blk_queue_bypass_start(q);
+ spin_lock_irq(q->queue_lock);
+
+ __clear_bit(pol->plid, q->blkcg_pols);
+
+ /* if no policy is left, no need for blkgs - shoot them down */
+ if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS))
+ blkg_destroy_all(q);
+
+ list_for_each_entry(blkg, &q->blkg_list, q_node) {
+ /* grab blkcg lock too while removing @pd from @blkg */
+ spin_lock(&blkg->blkcg->lock);
+
+ if (pol->ops.blkio_exit_group_fn)
+ pol->ops.blkio_exit_group_fn(blkg);
+
+ kfree(blkg->pd[pol->plid]);
+ blkg->pd[pol->plid] = NULL;
+
+ spin_unlock(&blkg->blkcg->lock);
+ }
+
+ spin_unlock_irq(q->queue_lock);
+ blk_queue_bypass_end(q);
+}
+EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);
+
+/**
+ * blkio_policy_register - register a blkcg policy
+ * @blkiop: blkcg policy to register
+ *
+ * Register @blkiop with blkcg core. Might sleep and @blkiop may be
+ * modified on successful registration. Returns 0 on success and -errno on
+ * failure.
+ */
+int blkio_policy_register(struct blkio_policy_type *blkiop)
{
- struct request_queue *q;
+ int i, ret;
- blkcg_bypass_start();
- spin_lock(&blkio_list_lock);
+ mutex_lock(&blkcg_pol_mutex);
- BUG_ON(blkio_policy[blkiop->plid]);
- blkio_policy[blkiop->plid] = blkiop;
- list_add_tail(&blkiop->list, &blkio_list);
+ /* find an empty slot */
+ ret = -ENOSPC;
+ for (i = 0; i < BLKCG_MAX_POLS; i++)
+ if (!blkio_policy[i])
+ break;
+ if (i >= BLKCG_MAX_POLS)
+ goto out_unlock;
- spin_unlock(&blkio_list_lock);
- list_for_each_entry(q, &all_q_list, all_q_node)
- update_root_blkg_pd(q, blkiop->plid);
- blkcg_bypass_end();
+ /* register and update blkgs */
+ blkiop->plid = i;
+ blkio_policy[i] = blkiop;
+ /* everything is in place, add intf files for the new policy */
if (blkiop->cftypes)
WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes));
+ ret = 0;
+out_unlock:
+ mutex_unlock(&blkcg_pol_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(blkio_policy_register);
+/**
+ * blkiop_policy_unregister - unregister a blkcg policy
+ * @blkiop: blkcg policy to unregister
+ *
+ * Undo blkio_policy_register(@blkiop). Might sleep.
+ */
void blkio_policy_unregister(struct blkio_policy_type *blkiop)
{
- struct request_queue *q;
+ mutex_lock(&blkcg_pol_mutex);
+ if (WARN_ON(blkio_policy[blkiop->plid] != blkiop))
+ goto out_unlock;
+
+ /* kill the intf files first */
if (blkiop->cftypes)
cgroup_rm_cftypes(&blkio_subsys, blkiop->cftypes);
- blkcg_bypass_start();
- spin_lock(&blkio_list_lock);
-
- BUG_ON(blkio_policy[blkiop->plid] != blkiop);
+ /* unregister and update blkgs */
blkio_policy[blkiop->plid] = NULL;
- list_del_init(&blkiop->list);
-
- spin_unlock(&blkio_list_lock);
- list_for_each_entry(q, &all_q_list, all_q_node)
- update_root_blkg_pd(q, blkiop->plid);
- blkcg_bypass_end();
+out_unlock:
+ mutex_unlock(&blkcg_pol_mutex);
}
EXPORT_SYMBOL_GPL(blkio_policy_unregister);