/* A workqueue to queue throttle related work */
static struct workqueue_struct *kthrotld_workqueue;
-static void throtl_schedule_delayed_work(struct throtl_data *td,
- unsigned long delay);
-struct throtl_rb_root {
- struct rb_root rb;
- struct rb_node *left;
- unsigned int count;
- unsigned long min_disptime;
+struct throtl_service_queue {
+ struct rb_root pending_tree; /* RB tree of active tgs */
+ struct rb_node *first_pending; /* first node in the tree */
+ unsigned int nr_pending; /* # queued in the tree */
+ unsigned long first_pending_disptime; /* disptime of the first tg */
};
-#define THROTL_RB_ROOT (struct throtl_rb_root) { .rb = RB_ROOT, .left = NULL, \
- .count = 0, .min_disptime = 0}
+enum tg_state_flags {
+ THROTL_TG_PENDING = 1 << 0, /* on parent's pending tree */
+};
#define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node)
/* must be the first member */
struct blkg_policy_data pd;
- /* active throtl group service_tree member */
+ /* active throtl group service_queue member */
struct rb_node rb_node;
+ /* throtl_data this group belongs to */
+ struct throtl_data *td;
+
+ /* this group's service queue */
+ struct throtl_service_queue service_queue;
+
/*
* Dispatch time in jiffies. This is the estimated time when group
* will unthrottle and is ready to dispatch more bio. It is used as
unsigned long slice_start[2];
unsigned long slice_end[2];
- /* Some throttle limits got updated for the group */
- int limits_changed;
-
/* Per cpu stats pointer */
struct tg_stats_cpu __percpu *stats_cpu;
struct throtl_data
{
/* service tree for active throtl groups */
- struct throtl_rb_root tg_service_tree;
+ struct throtl_service_queue service_queue;
struct request_queue *queue;
unsigned int nr_undestroyed_grps;
/* Work for dispatching throttled bios */
- struct delayed_work throtl_work;
-
- int limits_changed;
+ struct delayed_work dispatch_work;
};
/* list and work item to allocate percpu group stats */
return blkg_to_tg(td->queue->root_blkg);
}
-enum tg_state_flags {
- THROTL_TG_FLAG_on_rr = 0, /* on round-robin busy list */
-};
-
-#define THROTL_TG_FNS(name) \
-static inline void throtl_mark_tg_##name(struct throtl_grp *tg) \
-{ \
- (tg)->flags |= (1 << THROTL_TG_FLAG_##name); \
-} \
-static inline void throtl_clear_tg_##name(struct throtl_grp *tg) \
-{ \
- (tg)->flags &= ~(1 << THROTL_TG_FLAG_##name); \
-} \
-static inline int throtl_tg_##name(const struct throtl_grp *tg) \
-{ \
- return ((tg)->flags & (1 << THROTL_TG_FLAG_##name)) != 0; \
-}
-
-THROTL_TG_FNS(on_rr);
-
-#define throtl_log_tg(td, tg, fmt, args...) do { \
+#define throtl_log_tg(tg, fmt, args...) do { \
char __pbuf[128]; \
\
blkg_path(tg_to_blkg(tg), __pbuf, sizeof(__pbuf)); \
- blk_add_trace_msg((td)->queue, "throtl %s " fmt, __pbuf, ##args); \
+ blk_add_trace_msg((tg)->td->queue, "throtl %s " fmt, __pbuf, ##args); \
} while (0)
#define throtl_log(td, fmt, args...) \
blk_add_trace_msg((td)->queue, "throtl " fmt, ##args)
-static inline unsigned int total_nr_queued(struct throtl_data *td)
-{
- return td->nr_queued[0] + td->nr_queued[1];
-}
-
/*
* Worker for allocating per cpu stat for tgs. This is scheduled on the
* system_wq once there are some groups on the alloc_list waiting for
goto alloc_stats;
}
+/* init a service_queue, assumes the caller zeroed it */
+static void throtl_service_queue_init(struct throtl_service_queue *sq)
+{
+ sq->pending_tree = RB_ROOT;
+}
+
static void throtl_pd_init(struct blkcg_gq *blkg)
{
struct throtl_grp *tg = blkg_to_tg(blkg);
unsigned long flags;
+ throtl_service_queue_init(&tg->service_queue);
RB_CLEAR_NODE(&tg->rb_node);
+ tg->td = blkg->q->td;
bio_list_init(&tg->bio_lists[0]);
bio_list_init(&tg->bio_lists[1]);
- tg->limits_changed = false;
tg->bps[READ] = -1;
tg->bps[WRITE] = -1;
return tg;
}
-static struct throtl_grp *throtl_rb_first(struct throtl_rb_root *root)
+static struct throtl_grp *
+throtl_rb_first(struct throtl_service_queue *parent_sq)
{
/* Service tree is empty */
- if (!root->count)
+ if (!parent_sq->nr_pending)
return NULL;
- if (!root->left)
- root->left = rb_first(&root->rb);
+ if (!parent_sq->first_pending)
+ parent_sq->first_pending = rb_first(&parent_sq->pending_tree);
- if (root->left)
- return rb_entry_tg(root->left);
+ if (parent_sq->first_pending)
+ return rb_entry_tg(parent_sq->first_pending);
return NULL;
}
RB_CLEAR_NODE(n);
}
-static void throtl_rb_erase(struct rb_node *n, struct throtl_rb_root *root)
+static void throtl_rb_erase(struct rb_node *n,
+ struct throtl_service_queue *parent_sq)
{
- if (root->left == n)
- root->left = NULL;
- rb_erase_init(n, &root->rb);
- --root->count;
+ if (parent_sq->first_pending == n)
+ parent_sq->first_pending = NULL;
+ rb_erase_init(n, &parent_sq->pending_tree);
+ --parent_sq->nr_pending;
}
-static void update_min_dispatch_time(struct throtl_rb_root *st)
+static void update_min_dispatch_time(struct throtl_service_queue *parent_sq)
{
struct throtl_grp *tg;
- tg = throtl_rb_first(st);
+ tg = throtl_rb_first(parent_sq);
if (!tg)
return;
- st->min_disptime = tg->disptime;
+ parent_sq->first_pending_disptime = tg->disptime;
}
-static void
-tg_service_tree_add(struct throtl_rb_root *st, struct throtl_grp *tg)
+static void tg_service_queue_add(struct throtl_grp *tg,
+ struct throtl_service_queue *parent_sq)
{
- struct rb_node **node = &st->rb.rb_node;
+ struct rb_node **node = &parent_sq->pending_tree.rb_node;
struct rb_node *parent = NULL;
struct throtl_grp *__tg;
unsigned long key = tg->disptime;
}
if (left)
- st->left = &tg->rb_node;
+ parent_sq->first_pending = &tg->rb_node;
rb_link_node(&tg->rb_node, parent, node);
- rb_insert_color(&tg->rb_node, &st->rb);
+ rb_insert_color(&tg->rb_node, &parent_sq->pending_tree);
}
-static void __throtl_enqueue_tg(struct throtl_data *td, struct throtl_grp *tg)
+static void __throtl_enqueue_tg(struct throtl_grp *tg,
+ struct throtl_service_queue *parent_sq)
{
- struct throtl_rb_root *st = &td->tg_service_tree;
+ tg_service_queue_add(tg, parent_sq);
+ tg->flags |= THROTL_TG_PENDING;
+ parent_sq->nr_pending++;
+}
- tg_service_tree_add(st, tg);
- throtl_mark_tg_on_rr(tg);
- st->count++;
+static void throtl_enqueue_tg(struct throtl_grp *tg,
+ struct throtl_service_queue *parent_sq)
+{
+ if (!(tg->flags & THROTL_TG_PENDING))
+ __throtl_enqueue_tg(tg, parent_sq);
}
-static void throtl_enqueue_tg(struct throtl_data *td, struct throtl_grp *tg)
+static void __throtl_dequeue_tg(struct throtl_grp *tg,
+ struct throtl_service_queue *parent_sq)
{
- if (!throtl_tg_on_rr(tg))
- __throtl_enqueue_tg(td, tg);
+ throtl_rb_erase(&tg->rb_node, parent_sq);
+ tg->flags &= ~THROTL_TG_PENDING;
}
-static void __throtl_dequeue_tg(struct throtl_data *td, struct throtl_grp *tg)
+static void throtl_dequeue_tg(struct throtl_grp *tg,
+ struct throtl_service_queue *parent_sq)
{
- throtl_rb_erase(&tg->rb_node, &td->tg_service_tree);
- throtl_clear_tg_on_rr(tg);
+ if (tg->flags & THROTL_TG_PENDING)
+ __throtl_dequeue_tg(tg, parent_sq);
}
-static void throtl_dequeue_tg(struct throtl_data *td, struct throtl_grp *tg)
+/* Call with queue lock held */
+static void throtl_schedule_delayed_work(struct throtl_data *td,
+ unsigned long delay)
{
- if (throtl_tg_on_rr(tg))
- __throtl_dequeue_tg(td, tg);
+ struct delayed_work *dwork = &td->dispatch_work;
+
+ mod_delayed_work(kthrotld_workqueue, dwork, delay);
+ throtl_log(td, "schedule work. delay=%lu jiffies=%lu", delay, jiffies);
}
static void throtl_schedule_next_dispatch(struct throtl_data *td)
{
- struct throtl_rb_root *st = &td->tg_service_tree;
+ struct throtl_service_queue *sq = &td->service_queue;
- /*
- * If there are more bios pending, schedule more work.
- */
- if (!total_nr_queued(td))
+ /* any pending children left? */
+ if (!sq->nr_pending)
return;
- BUG_ON(!st->count);
+ update_min_dispatch_time(sq);
- update_min_dispatch_time(st);
-
- if (time_before_eq(st->min_disptime, jiffies))
+ if (time_before_eq(sq->first_pending_disptime, jiffies))
throtl_schedule_delayed_work(td, 0);
else
- throtl_schedule_delayed_work(td, (st->min_disptime - jiffies));
+ throtl_schedule_delayed_work(td, sq->first_pending_disptime - jiffies);
}
-static inline void
-throtl_start_new_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw)
+static inline void throtl_start_new_slice(struct throtl_grp *tg, bool rw)
{
tg->bytes_disp[rw] = 0;
tg->io_disp[rw] = 0;
tg->slice_start[rw] = jiffies;
tg->slice_end[rw] = jiffies + throtl_slice;
- throtl_log_tg(td, tg, "[%c] new slice start=%lu end=%lu jiffies=%lu",
+ throtl_log_tg(tg, "[%c] new slice start=%lu end=%lu jiffies=%lu",
rw == READ ? 'R' : 'W', tg->slice_start[rw],
tg->slice_end[rw], jiffies);
}
-static inline void throtl_set_slice_end(struct throtl_data *td,
- struct throtl_grp *tg, bool rw, unsigned long jiffy_end)
+static inline void throtl_set_slice_end(struct throtl_grp *tg, bool rw,
+ unsigned long jiffy_end)
{
tg->slice_end[rw] = roundup(jiffy_end, throtl_slice);
}
-static inline void throtl_extend_slice(struct throtl_data *td,
- struct throtl_grp *tg, bool rw, unsigned long jiffy_end)
+static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw,
+ unsigned long jiffy_end)
{
tg->slice_end[rw] = roundup(jiffy_end, throtl_slice);
- throtl_log_tg(td, tg, "[%c] extend slice start=%lu end=%lu jiffies=%lu",
+ throtl_log_tg(tg, "[%c] extend slice start=%lu end=%lu jiffies=%lu",
rw == READ ? 'R' : 'W', tg->slice_start[rw],
tg->slice_end[rw], jiffies);
}
/* Determine if previously allocated or extended slice is complete or not */
-static bool
-throtl_slice_used(struct throtl_data *td, struct throtl_grp *tg, bool rw)
+static bool throtl_slice_used(struct throtl_grp *tg, bool rw)
{
if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw]))
return 0;
}
/* Trim the used slices and adjust slice start accordingly */
-static inline void
-throtl_trim_slice(struct throtl_data *td, struct throtl_grp *tg, bool rw)
+static inline void throtl_trim_slice(struct throtl_grp *tg, bool rw)
{
unsigned long nr_slices, time_elapsed, io_trim;
u64 bytes_trim, tmp;
* renewed. Don't try to trim the slice if slice is used. A new
* slice will start when appropriate.
*/
- if (throtl_slice_used(td, tg, rw))
+ if (throtl_slice_used(tg, rw))
return;
/*
* is bad because it does not allow new slice to start.
*/
- throtl_set_slice_end(td, tg, rw, jiffies + throtl_slice);
+ throtl_set_slice_end(tg, rw, jiffies + throtl_slice);
time_elapsed = jiffies - tg->slice_start[rw];
tg->slice_start[rw] += nr_slices * throtl_slice;
- throtl_log_tg(td, tg, "[%c] trim slice nr=%lu bytes=%llu io=%lu"
+ throtl_log_tg(tg, "[%c] trim slice nr=%lu bytes=%llu io=%lu"
" start=%lu end=%lu jiffies=%lu",
rw == READ ? 'R' : 'W', nr_slices, bytes_trim, io_trim,
tg->slice_start[rw], tg->slice_end[rw], jiffies);
}
-static bool tg_with_in_iops_limit(struct throtl_data *td, struct throtl_grp *tg,
- struct bio *bio, unsigned long *wait)
+static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio,
+ unsigned long *wait)
{
bool rw = bio_data_dir(bio);
unsigned int io_allowed;
return 0;
}
-static bool tg_with_in_bps_limit(struct throtl_data *td, struct throtl_grp *tg,
- struct bio *bio, unsigned long *wait)
+static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
+ unsigned long *wait)
{
bool rw = bio_data_dir(bio);
u64 bytes_allowed, extra_bytes, tmp;
* Returns whether one can dispatch a bio or not. Also returns approx number
* of jiffies to wait before this bio is with-in IO rate and can be dispatched
*/
-static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg,
- struct bio *bio, unsigned long *wait)
+static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
+ unsigned long *wait)
{
bool rw = bio_data_dir(bio);
unsigned long bps_wait = 0, iops_wait = 0, max_wait = 0;
* existing slice to make sure it is at least throtl_slice interval
* long since now.
*/
- if (throtl_slice_used(td, tg, rw))
- throtl_start_new_slice(td, tg, rw);
+ if (throtl_slice_used(tg, rw))
+ throtl_start_new_slice(tg, rw);
else {
if (time_before(tg->slice_end[rw], jiffies + throtl_slice))
- throtl_extend_slice(td, tg, rw, jiffies + throtl_slice);
+ throtl_extend_slice(tg, rw, jiffies + throtl_slice);
}
- if (tg_with_in_bps_limit(td, tg, bio, &bps_wait)
- && tg_with_in_iops_limit(td, tg, bio, &iops_wait)) {
+ if (tg_with_in_bps_limit(tg, bio, &bps_wait) &&
+ tg_with_in_iops_limit(tg, bio, &iops_wait)) {
if (wait)
*wait = 0;
return 1;
*wait = max_wait;
if (time_before(tg->slice_end[rw], jiffies + max_wait))
- throtl_extend_slice(td, tg, rw, jiffies + max_wait);
+ throtl_extend_slice(tg, rw, jiffies + max_wait);
return 0;
}
throtl_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, bio->bi_rw);
}
-static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg,
- struct bio *bio)
+static void throtl_add_bio_tg(struct bio *bio, struct throtl_grp *tg,
+ struct throtl_service_queue *parent_sq)
{
bool rw = bio_data_dir(bio);
/* Take a bio reference on tg */
blkg_get(tg_to_blkg(tg));
tg->nr_queued[rw]++;
- td->nr_queued[rw]++;
- throtl_enqueue_tg(td, tg);
+ tg->td->nr_queued[rw]++;
+ throtl_enqueue_tg(tg, parent_sq);
}
-static void tg_update_disptime(struct throtl_data *td, struct throtl_grp *tg)
+static void tg_update_disptime(struct throtl_grp *tg,
+ struct throtl_service_queue *parent_sq)
{
unsigned long read_wait = -1, write_wait = -1, min_wait = -1, disptime;
struct bio *bio;
if ((bio = bio_list_peek(&tg->bio_lists[READ])))
- tg_may_dispatch(td, tg, bio, &read_wait);
+ tg_may_dispatch(tg, bio, &read_wait);
if ((bio = bio_list_peek(&tg->bio_lists[WRITE])))
- tg_may_dispatch(td, tg, bio, &write_wait);
+ tg_may_dispatch(tg, bio, &write_wait);
min_wait = min(read_wait, write_wait);
disptime = jiffies + min_wait;
/* Update dispatch time */
- throtl_dequeue_tg(td, tg);
+ throtl_dequeue_tg(tg, parent_sq);
tg->disptime = disptime;
- throtl_enqueue_tg(td, tg);
+ throtl_enqueue_tg(tg, parent_sq);
}
-static void tg_dispatch_one_bio(struct throtl_data *td, struct throtl_grp *tg,
- bool rw, struct bio_list *bl)
+static void tg_dispatch_one_bio(struct throtl_grp *tg, bool rw,
+ struct bio_list *bl)
{
struct bio *bio;
/* Drop bio reference on blkg */
blkg_put(tg_to_blkg(tg));
- BUG_ON(td->nr_queued[rw] <= 0);
- td->nr_queued[rw]--;
+ BUG_ON(tg->td->nr_queued[rw] <= 0);
+ tg->td->nr_queued[rw]--;
throtl_charge_bio(tg, bio);
bio_list_add(bl, bio);
bio->bi_rw |= REQ_THROTTLED;
- throtl_trim_slice(td, tg, rw);
+ throtl_trim_slice(tg, rw);
}
-static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg,
- struct bio_list *bl)
+static int throtl_dispatch_tg(struct throtl_grp *tg, struct bio_list *bl)
{
unsigned int nr_reads = 0, nr_writes = 0;
unsigned int max_nr_reads = throtl_grp_quantum*3/4;
/* Try to dispatch 75% READS and 25% WRITES */
- while ((bio = bio_list_peek(&tg->bio_lists[READ]))
- && tg_may_dispatch(td, tg, bio, NULL)) {
+ while ((bio = bio_list_peek(&tg->bio_lists[READ])) &&
+ tg_may_dispatch(tg, bio, NULL)) {
- tg_dispatch_one_bio(td, tg, bio_data_dir(bio), bl);
+ tg_dispatch_one_bio(tg, bio_data_dir(bio), bl);
nr_reads++;
if (nr_reads >= max_nr_reads)
break;
}
- while ((bio = bio_list_peek(&tg->bio_lists[WRITE]))
- && tg_may_dispatch(td, tg, bio, NULL)) {
+ while ((bio = bio_list_peek(&tg->bio_lists[WRITE])) &&
+ tg_may_dispatch(tg, bio, NULL)) {
- tg_dispatch_one_bio(td, tg, bio_data_dir(bio), bl);
+ tg_dispatch_one_bio(tg, bio_data_dir(bio), bl);
nr_writes++;
if (nr_writes >= max_nr_writes)
return nr_reads + nr_writes;
}
-static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl)
+static int throtl_select_dispatch(struct throtl_service_queue *parent_sq,
+ struct bio_list *bl)
{
unsigned int nr_disp = 0;
struct throtl_grp *tg;
- struct throtl_rb_root *st = &td->tg_service_tree;
while (1) {
- tg = throtl_rb_first(st);
+ tg = throtl_rb_first(parent_sq);
if (!tg)
break;
if (time_before(jiffies, tg->disptime))
break;
- throtl_dequeue_tg(td, tg);
+ throtl_dequeue_tg(tg, parent_sq);
- nr_disp += throtl_dispatch_tg(td, tg, bl);
+ nr_disp += throtl_dispatch_tg(tg, bl);
- if (tg->nr_queued[0] || tg->nr_queued[1]) {
- tg_update_disptime(td, tg);
- throtl_enqueue_tg(td, tg);
- }
+ if (tg->nr_queued[0] || tg->nr_queued[1])
+ tg_update_disptime(tg, parent_sq);
if (nr_disp >= throtl_quantum)
break;
return nr_disp;
}
-static void throtl_process_limit_change(struct throtl_data *td)
+/* work function to dispatch throttled bios */
+void blk_throtl_dispatch_work_fn(struct work_struct *work)
{
+ struct throtl_data *td = container_of(to_delayed_work(work),
+ struct throtl_data, dispatch_work);
struct request_queue *q = td->queue;
- struct blkcg_gq *blkg, *n;
-
- if (!td->limits_changed)
- return;
-
- xchg(&td->limits_changed, false);
-
- throtl_log(td, "limits changed");
-
- list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
- struct throtl_grp *tg = blkg_to_tg(blkg);
-
- if (!tg->limits_changed)
- continue;
-
- if (!xchg(&tg->limits_changed, false))
- continue;
-
- throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu"
- " riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE],
- tg->iops[READ], tg->iops[WRITE]);
-
- /*
- * Restart the slices for both READ and WRITES. It
- * might happen that a group's limit are dropped
- * suddenly and we don't want to account recently
- * dispatched IO with new low rate
- */
- throtl_start_new_slice(td, tg, 0);
- throtl_start_new_slice(td, tg, 1);
-
- if (throtl_tg_on_rr(tg))
- tg_update_disptime(td, tg);
- }
-}
-
-/* Dispatch throttled bios. Should be called without queue lock held. */
-static int throtl_dispatch(struct request_queue *q)
-{
- struct throtl_data *td = q->td;
unsigned int nr_disp = 0;
struct bio_list bio_list_on_stack;
struct bio *bio;
spin_lock_irq(q->queue_lock);
- throtl_process_limit_change(td);
-
- if (!total_nr_queued(td))
- goto out;
-
bio_list_init(&bio_list_on_stack);
throtl_log(td, "dispatch nr_queued=%u read=%u write=%u",
- total_nr_queued(td), td->nr_queued[READ],
- td->nr_queued[WRITE]);
+ td->nr_queued[READ] + td->nr_queued[WRITE],
+ td->nr_queued[READ], td->nr_queued[WRITE]);
- nr_disp = throtl_select_dispatch(td, &bio_list_on_stack);
+ nr_disp = throtl_select_dispatch(&td->service_queue, &bio_list_on_stack);
if (nr_disp)
throtl_log(td, "bios disp=%u", nr_disp);
throtl_schedule_next_dispatch(td);
-out:
+
spin_unlock_irq(q->queue_lock);
/*
generic_make_request(bio);
blk_finish_plug(&plug);
}
- return nr_disp;
-}
-
-void blk_throtl_work(struct work_struct *work)
-{
- struct throtl_data *td = container_of(work, struct throtl_data,
- throtl_work.work);
- struct request_queue *q = td->queue;
-
- throtl_dispatch(q);
-}
-
-/* Call with queue lock held */
-static void
-throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay)
-{
-
- struct delayed_work *dwork = &td->throtl_work;
-
- /* schedule work if limits changed even if no bio is queued */
- if (total_nr_queued(td) || td->limits_changed) {
- mod_delayed_work(kthrotld_workqueue, dwork, delay);
- throtl_log(td, "schedule work. delay=%lu jiffies=%lu",
- delay, jiffies);
- }
}
static u64 tg_prfill_cpu_rwstat(struct seq_file *sf,
else
*(unsigned int *)((void *)tg + cft->private) = ctx.v;
- /* XXX: we don't need the following deferred processing */
- xchg(&tg->limits_changed, true);
- xchg(&td->limits_changed, true);
- throtl_schedule_delayed_work(td, 0);
+ throtl_log_tg(tg, "limit change rbps=%llu wbps=%llu riops=%u wiops=%u",
+ tg->bps[READ], tg->bps[WRITE],
+ tg->iops[READ], tg->iops[WRITE]);
+
+ /*
+ * We're already holding queue_lock and know @tg is valid. Let's
+ * apply the new config directly.
+ *
+ * Restart the slices for both READ and WRITES. It might happen
+ * that a group's limit are dropped suddenly and we don't want to
+ * account recently dispatched IO with new low rate.
+ */
+ throtl_start_new_slice(tg, 0);
+ throtl_start_new_slice(tg, 1);
+
+ if (tg->flags & THROTL_TG_PENDING) {
+ tg_update_disptime(tg, &td->service_queue);
+ throtl_schedule_next_dispatch(td);
+ }
blkg_conf_finish(&ctx);
return 0;
{
struct throtl_data *td = q->td;
- cancel_delayed_work_sync(&td->throtl_work);
+ cancel_delayed_work_sync(&td->dispatch_work);
}
static struct blkcg_policy blkcg_policy_throtl = {
}
/* Bio is with-in rate limit of group */
- if (tg_may_dispatch(td, tg, bio, NULL)) {
+ if (tg_may_dispatch(tg, bio, NULL)) {
throtl_charge_bio(tg, bio);
/*
*
* So keep on trimming slice even if bio is not queued.
*/
- throtl_trim_slice(td, tg, rw);
+ throtl_trim_slice(tg, rw);
goto out_unlock;
}
queue_bio:
- throtl_log_tg(td, tg, "[%c] bio. bdisp=%llu sz=%u bps=%llu"
+ throtl_log_tg(tg, "[%c] bio. bdisp=%llu sz=%u bps=%llu"
" iodisp=%u iops=%u queued=%d/%d",
rw == READ ? 'R' : 'W',
tg->bytes_disp[rw], bio->bi_size, tg->bps[rw],
tg->nr_queued[READ], tg->nr_queued[WRITE]);
bio_associate_current(bio);
- throtl_add_bio_tg(q->td, tg, bio);
+ throtl_add_bio_tg(bio, tg, &q->td->service_queue);
throttled = true;
if (update_disptime) {
- tg_update_disptime(td, tg);
+ tg_update_disptime(tg, &td->service_queue);
throtl_schedule_next_dispatch(td);
}
__releases(q->queue_lock) __acquires(q->queue_lock)
{
struct throtl_data *td = q->td;
- struct throtl_rb_root *st = &td->tg_service_tree;
+ struct throtl_service_queue *parent_sq = &td->service_queue;
struct throtl_grp *tg;
struct bio_list bl;
struct bio *bio;
bio_list_init(&bl);
- while ((tg = throtl_rb_first(st))) {
- throtl_dequeue_tg(td, tg);
+ while ((tg = throtl_rb_first(parent_sq))) {
+ throtl_dequeue_tg(tg, parent_sq);
while ((bio = bio_list_peek(&tg->bio_lists[READ])))
- tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl);
+ tg_dispatch_one_bio(tg, bio_data_dir(bio), &bl);
while ((bio = bio_list_peek(&tg->bio_lists[WRITE])))
- tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl);
+ tg_dispatch_one_bio(tg, bio_data_dir(bio), &bl);
}
spin_unlock_irq(q->queue_lock);
if (!td)
return -ENOMEM;
- td->tg_service_tree = THROTL_RB_ROOT;
- td->limits_changed = false;
- INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
+ INIT_DELAYED_WORK(&td->dispatch_work, blk_throtl_dispatch_work_fn);
+ throtl_service_queue_init(&td->service_queue);
q->td = td;
td->queue = q;