]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - block/blk-cgroup.h
blkcg: restructure blkio_group configruation setting
[mirror_ubuntu-zesty-kernel.git] / block / blk-cgroup.h
CommitLineData
31e4c28d
VG
1#ifndef _BLK_CGROUP_H
2#define _BLK_CGROUP_H
3/*
4 * Common Block IO controller cgroup interface
5 *
6 * Based on ideas and code from CFQ, CFS and BFQ:
7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
8 *
9 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
10 * Paolo Valente <paolo.valente@unimore.it>
11 *
12 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
13 * Nauman Rafique <nauman@google.com>
14 */
15
16#include <linux/cgroup.h>
575969a0 17#include <linux/u64_stats_sync.h>
31e4c28d 18
062a644d
VG
19enum blkio_policy_id {
20 BLKIO_POLICY_PROP = 0, /* Proportional Bandwidth division */
4c9eefa1 21 BLKIO_POLICY_THROTL, /* Throttling */
035d10b2
TH
22
23 BLKIO_NR_POLICIES,
062a644d
VG
24};
25
9355aede
VG
26/* Max limits for throttle policy */
27#define THROTL_IOPS_MAX UINT_MAX
28
32e380ae 29#ifdef CONFIG_BLK_CGROUP
2f5ea477 30
d3d32e69
TH
31/* cft->private [un]packing for stat printing */
32#define BLKCG_STAT_PRIV(pol, off) (((unsigned)(pol) << 16) | (off))
33#define BLKCG_STAT_POL(prv) ((unsigned)(prv) >> 16)
34#define BLKCG_STAT_OFF(prv) ((unsigned)(prv) & 0xffff)
2aa4a152 35
edcb0722
TH
36enum blkg_rwstat_type {
37 BLKG_RWSTAT_READ,
38 BLKG_RWSTAT_WRITE,
39 BLKG_RWSTAT_SYNC,
40 BLKG_RWSTAT_ASYNC,
41
42 BLKG_RWSTAT_NR,
43 BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
303a3acb
DS
44};
45
812df48d
DS
46/* blkg state flags */
47enum blkg_state_flags {
48 BLKG_waiting = 0,
49 BLKG_idling,
50 BLKG_empty,
51};
52
31e4c28d
VG
53struct blkio_cgroup {
54 struct cgroup_subsys_state css;
55 unsigned int weight;
56 spinlock_t lock;
57 struct hlist_head blkg_list;
9a9e8a26
TH
58
59 /* for policies to test whether associated blkcg has changed */
60 uint64_t id;
31e4c28d
VG
61};
62
edcb0722
TH
63struct blkg_stat {
64 struct u64_stats_sync syncp;
65 uint64_t cnt;
66};
67
68struct blkg_rwstat {
69 struct u64_stats_sync syncp;
70 uint64_t cnt[BLKG_RWSTAT_NR];
71};
72
303a3acb 73struct blkio_group_stats {
edcb0722
TH
74 /* number of ios merged */
75 struct blkg_rwstat merged;
76 /* total time spent on device in ns, may not be accurate w/ queueing */
77 struct blkg_rwstat service_time;
78 /* total time spent waiting in scheduler queue in ns */
79 struct blkg_rwstat wait_time;
80 /* number of IOs queued up */
81 struct blkg_rwstat queued;
303a3acb 82 /* total disk time and nr sectors dispatched by this group */
edcb0722 83 struct blkg_stat time;
303a3acb 84#ifdef CONFIG_DEBUG_BLK_CGROUP
edcb0722
TH
85 /* time not charged to this cgroup */
86 struct blkg_stat unaccounted_time;
87 /* sum of number of ios queued across all samples */
88 struct blkg_stat avg_queue_size_sum;
89 /* count of samples taken for average */
90 struct blkg_stat avg_queue_size_samples;
91 /* how many times this group has been removed from service tree */
92 struct blkg_stat dequeue;
93 /* total time spent waiting for it to be assigned a timeslice. */
94 struct blkg_stat group_wait_time;
95 /* time spent idling for this blkio_group */
96 struct blkg_stat idle_time;
97 /* total time with empty current active q with other requests queued */
98 struct blkg_stat empty_time;
997a026c 99 /* fields after this shouldn't be cleared on stat reset */
edcb0722
TH
100 uint64_t start_group_wait_time;
101 uint64_t start_idle_time;
102 uint64_t start_empty_time;
103 uint16_t flags;
303a3acb
DS
104#endif
105};
106
5624a4e4
VG
107/* Per cpu blkio group stats */
108struct blkio_group_stats_cpu {
edcb0722
TH
109 /* total bytes transferred */
110 struct blkg_rwstat service_bytes;
111 /* total IOs serviced, post merge */
112 struct blkg_rwstat serviced;
113 /* total sectors transferred */
114 struct blkg_stat sectors;
5624a4e4
VG
115};
116
e56da7e2
TH
117struct blkio_group_conf {
118 unsigned int weight;
c4682aec 119 u64 iops[2];
e56da7e2
TH
120 u64 bps[2];
121};
122
0381411e
TH
123/* per-blkg per-policy data */
124struct blkg_policy_data {
125 /* the blkg this per-policy data belongs to */
126 struct blkio_group *blkg;
127
549d3aa8
TH
128 /* Configuration */
129 struct blkio_group_conf conf;
130
131 struct blkio_group_stats stats;
132 /* Per cpu stats pointer */
133 struct blkio_group_stats_cpu __percpu *stats_cpu;
134
0381411e
TH
135 /* pol->pdata_size bytes of private data used by policy impl */
136 char pdata[] __aligned(__alignof__(unsigned long long));
137};
138
31e4c28d 139struct blkio_group {
c875f4d0
TH
140 /* Pointer to the associated request_queue */
141 struct request_queue *q;
e8989fae 142 struct list_head q_node;
31e4c28d 143 struct hlist_node blkcg_node;
7ee9c562 144 struct blkio_cgroup *blkcg;
2868ef7b
VG
145 /* Store cgroup path */
146 char path[128];
1adaf3dd
TH
147 /* reference count */
148 int refcnt;
22084190 149
549d3aa8 150 struct blkg_policy_data *pd[BLKIO_NR_POLICIES];
1adaf3dd 151
1cd9e039
VG
152 /* List of blkg waiting for per cpu stats memory to be allocated */
153 struct list_head alloc_node;
1adaf3dd 154 struct rcu_head rcu_head;
31e4c28d
VG
155};
156
0381411e 157typedef void (blkio_init_group_fn)(struct blkio_group *blkg);
ca32aefc 158typedef void (blkio_update_group_weight_fn)(struct request_queue *q,
fe071437 159 struct blkio_group *blkg, unsigned int weight);
ca32aefc 160typedef void (blkio_update_group_read_bps_fn)(struct request_queue *q,
fe071437 161 struct blkio_group *blkg, u64 read_bps);
ca32aefc 162typedef void (blkio_update_group_write_bps_fn)(struct request_queue *q,
fe071437 163 struct blkio_group *blkg, u64 write_bps);
ca32aefc 164typedef void (blkio_update_group_read_iops_fn)(struct request_queue *q,
fe071437 165 struct blkio_group *blkg, unsigned int read_iops);
ca32aefc 166typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
fe071437 167 struct blkio_group *blkg, unsigned int write_iops);
3e252066
VG
168
169struct blkio_policy_ops {
0381411e 170 blkio_init_group_fn *blkio_init_group_fn;
3e252066 171 blkio_update_group_weight_fn *blkio_update_group_weight_fn;
4c9eefa1
VG
172 blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
173 blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
7702e8f4
VG
174 blkio_update_group_read_iops_fn *blkio_update_group_read_iops_fn;
175 blkio_update_group_write_iops_fn *blkio_update_group_write_iops_fn;
3e252066
VG
176};
177
178struct blkio_policy_type {
179 struct list_head list;
180 struct blkio_policy_ops ops;
062a644d 181 enum blkio_policy_id plid;
0381411e 182 size_t pdata_size; /* policy specific private data size */
3e252066
VG
183};
184
5efd6113
TH
185extern int blkcg_init_queue(struct request_queue *q);
186extern void blkcg_drain_queue(struct request_queue *q);
187extern void blkcg_exit_queue(struct request_queue *q);
188
3e252066
VG
189/* Blkio controller policy registration */
190extern void blkio_policy_register(struct blkio_policy_type *);
191extern void blkio_policy_unregister(struct blkio_policy_type *);
e8989fae
TH
192extern void blkg_destroy_all(struct request_queue *q, bool destroy_root);
193extern void update_root_blkg_pd(struct request_queue *q,
194 enum blkio_policy_id plid);
3e252066 195
0381411e
TH
196/**
197 * blkg_to_pdata - get policy private data
198 * @blkg: blkg of interest
199 * @pol: policy of interest
200 *
201 * Return pointer to private data associated with the @blkg-@pol pair.
202 */
203static inline void *blkg_to_pdata(struct blkio_group *blkg,
204 struct blkio_policy_type *pol)
205{
549d3aa8 206 return blkg ? blkg->pd[pol->plid]->pdata : NULL;
0381411e
TH
207}
208
209/**
210 * pdata_to_blkg - get blkg associated with policy private data
211 * @pdata: policy private data of interest
0381411e 212 *
aaec55a0 213 * @pdata is policy private data. Determine the blkg it's associated with.
0381411e 214 */
aaec55a0 215static inline struct blkio_group *pdata_to_blkg(void *pdata)
0381411e
TH
216{
217 if (pdata) {
218 struct blkg_policy_data *pd =
219 container_of(pdata, struct blkg_policy_data, pdata);
220 return pd->blkg;
221 }
222 return NULL;
223}
224
afc24d49
VG
225static inline char *blkg_path(struct blkio_group *blkg)
226{
227 return blkg->path;
228}
229
1adaf3dd
TH
230/**
231 * blkg_get - get a blkg reference
232 * @blkg: blkg to get
233 *
234 * The caller should be holding queue_lock and an existing reference.
235 */
236static inline void blkg_get(struct blkio_group *blkg)
237{
238 lockdep_assert_held(blkg->q->queue_lock);
239 WARN_ON_ONCE(!blkg->refcnt);
240 blkg->refcnt++;
241}
242
243void __blkg_release(struct blkio_group *blkg);
244
245/**
246 * blkg_put - put a blkg reference
247 * @blkg: blkg to put
248 *
249 * The caller should be holding queue_lock.
250 */
251static inline void blkg_put(struct blkio_group *blkg)
252{
253 lockdep_assert_held(blkg->q->queue_lock);
254 WARN_ON_ONCE(blkg->refcnt <= 0);
255 if (!--blkg->refcnt)
256 __blkg_release(blkg);
257}
258
edcb0722
TH
259/**
260 * blkg_stat_add - add a value to a blkg_stat
261 * @stat: target blkg_stat
262 * @val: value to add
263 *
264 * Add @val to @stat. The caller is responsible for synchronizing calls to
265 * this function.
266 */
267static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
268{
269 u64_stats_update_begin(&stat->syncp);
270 stat->cnt += val;
271 u64_stats_update_end(&stat->syncp);
272}
273
274/**
275 * blkg_stat_read - read the current value of a blkg_stat
276 * @stat: blkg_stat to read
277 *
278 * Read the current value of @stat. This function can be called without
279 * synchroniztion and takes care of u64 atomicity.
280 */
281static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
282{
283 unsigned int start;
284 uint64_t v;
285
286 do {
287 start = u64_stats_fetch_begin(&stat->syncp);
288 v = stat->cnt;
289 } while (u64_stats_fetch_retry(&stat->syncp, start));
290
291 return v;
292}
293
294/**
295 * blkg_stat_reset - reset a blkg_stat
296 * @stat: blkg_stat to reset
297 */
298static inline void blkg_stat_reset(struct blkg_stat *stat)
299{
300 stat->cnt = 0;
301}
302
303/**
304 * blkg_rwstat_add - add a value to a blkg_rwstat
305 * @rwstat: target blkg_rwstat
306 * @rw: mask of REQ_{WRITE|SYNC}
307 * @val: value to add
308 *
309 * Add @val to @rwstat. The counters are chosen according to @rw. The
310 * caller is responsible for synchronizing calls to this function.
311 */
312static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
313 int rw, uint64_t val)
314{
315 u64_stats_update_begin(&rwstat->syncp);
316
317 if (rw & REQ_WRITE)
318 rwstat->cnt[BLKG_RWSTAT_WRITE] += val;
319 else
320 rwstat->cnt[BLKG_RWSTAT_READ] += val;
321 if (rw & REQ_SYNC)
322 rwstat->cnt[BLKG_RWSTAT_SYNC] += val;
323 else
324 rwstat->cnt[BLKG_RWSTAT_ASYNC] += val;
325
326 u64_stats_update_end(&rwstat->syncp);
327}
328
329/**
330 * blkg_rwstat_read - read the current values of a blkg_rwstat
331 * @rwstat: blkg_rwstat to read
332 *
333 * Read the current snapshot of @rwstat and return it as the return value.
334 * This function can be called without synchronization and takes care of
335 * u64 atomicity.
336 */
337static struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
338{
339 unsigned int start;
340 struct blkg_rwstat tmp;
341
342 do {
343 start = u64_stats_fetch_begin(&rwstat->syncp);
344 tmp = *rwstat;
345 } while (u64_stats_fetch_retry(&rwstat->syncp, start));
346
347 return tmp;
348}
349
350/**
351 * blkg_rwstat_sum - read the total count of a blkg_rwstat
352 * @rwstat: blkg_rwstat to read
353 *
354 * Return the total count of @rwstat regardless of the IO direction. This
355 * function can be called without synchronization and takes care of u64
356 * atomicity.
357 */
358static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat *rwstat)
359{
360 struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
361
362 return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE];
363}
364
365/**
366 * blkg_rwstat_reset - reset a blkg_rwstat
367 * @rwstat: blkg_rwstat to reset
368 */
369static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
370{
371 memset(rwstat->cnt, 0, sizeof(rwstat->cnt));
372}
373
2f5ea477
JA
374#else
375
376struct blkio_group {
377};
378
3e252066
VG
379struct blkio_policy_type {
380};
381
5efd6113
TH
382static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
383static inline void blkcg_drain_queue(struct request_queue *q) { }
384static inline void blkcg_exit_queue(struct request_queue *q) { }
3e252066
VG
385static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
386static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
03aa264a 387static inline void blkg_destroy_all(struct request_queue *q,
03aa264a 388 bool destory_root) { }
e8989fae
TH
389static inline void update_root_blkg_pd(struct request_queue *q,
390 enum blkio_policy_id plid) { }
3e252066 391
0381411e
TH
392static inline void *blkg_to_pdata(struct blkio_group *blkg,
393 struct blkio_policy_type *pol) { return NULL; }
394static inline struct blkio_group *pdata_to_blkg(void *pdata,
395 struct blkio_policy_type *pol) { return NULL; }
afc24d49 396static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
1adaf3dd
TH
397static inline void blkg_get(struct blkio_group *blkg) { }
398static inline void blkg_put(struct blkio_group *blkg) { }
afc24d49 399
2f5ea477
JA
400#endif
401
df457f84 402#define BLKIO_WEIGHT_MIN 10
31e4c28d
VG
403#define BLKIO_WEIGHT_MAX 1000
404#define BLKIO_WEIGHT_DEFAULT 500
405
2868ef7b 406#ifdef CONFIG_DEBUG_BLK_CGROUP
c1768268
TH
407void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
408 struct blkio_policy_type *pol);
9195291e 409void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
c1768268
TH
410 struct blkio_policy_type *pol,
411 unsigned long dequeue);
412void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
413 struct blkio_policy_type *pol);
414void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
415 struct blkio_policy_type *pol);
416void blkiocg_set_start_empty_time(struct blkio_group *blkg,
417 struct blkio_policy_type *pol);
812df48d
DS
418
419#define BLKG_FLAG_FNS(name) \
420static inline void blkio_mark_blkg_##name( \
421 struct blkio_group_stats *stats) \
422{ \
423 stats->flags |= (1 << BLKG_##name); \
424} \
425static inline void blkio_clear_blkg_##name( \
426 struct blkio_group_stats *stats) \
427{ \
428 stats->flags &= ~(1 << BLKG_##name); \
429} \
430static inline int blkio_blkg_##name(struct blkio_group_stats *stats) \
431{ \
432 return (stats->flags & (1 << BLKG_##name)) != 0; \
433} \
434
435BLKG_FLAG_FNS(waiting)
436BLKG_FLAG_FNS(idling)
437BLKG_FLAG_FNS(empty)
438#undef BLKG_FLAG_FNS
2868ef7b 439#else
c1768268
TH
440static inline void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg,
441 struct blkio_policy_type *pol) { }
9195291e 442static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
c1768268
TH
443 struct blkio_policy_type *pol, unsigned long dequeue) { }
444static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg,
445 struct blkio_policy_type *pol) { }
446static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg,
447 struct blkio_policy_type *pol) { }
448static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg,
449 struct blkio_policy_type *pol) { }
2868ef7b
VG
450#endif
451
32e380ae 452#ifdef CONFIG_BLK_CGROUP
31e4c28d
VG
453extern struct blkio_cgroup blkio_root_cgroup;
454extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
4f85cb96 455extern struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio);
cd1604fa 456extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
e8989fae 457 struct request_queue *q);
cd1604fa
TH
458struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
459 struct request_queue *q,
cd1604fa 460 bool for_root);
303a3acb 461void blkiocg_update_timeslice_used(struct blkio_group *blkg,
c1768268
TH
462 struct blkio_policy_type *pol,
463 unsigned long time,
464 unsigned long unaccounted_time);
465void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
466 struct blkio_policy_type *pol,
467 uint64_t bytes, bool direction, bool sync);
84c124da 468void blkiocg_update_completion_stats(struct blkio_group *blkg,
c1768268
TH
469 struct blkio_policy_type *pol,
470 uint64_t start_time,
471 uint64_t io_start_time, bool direction,
472 bool sync);
473void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
474 struct blkio_policy_type *pol,
475 bool direction, bool sync);
a11cdaa7 476void blkiocg_update_io_add_stats(struct blkio_group *blkg,
c1768268
TH
477 struct blkio_policy_type *pol,
478 struct blkio_group *curr_blkg, bool direction,
479 bool sync);
a11cdaa7 480void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
c1768268
TH
481 struct blkio_policy_type *pol,
482 bool direction, bool sync);
31e4c28d 483#else
2f5ea477 484struct cgroup;
31e4c28d
VG
485static inline struct blkio_cgroup *
486cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
70087dc3 487static inline struct blkio_cgroup *
4f85cb96 488bio_blkio_cgroup(struct bio *bio) { return NULL; }
31e4c28d 489
cd1604fa
TH
490static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
491 void *key) { return NULL; }
303a3acb 492static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
c1768268
TH
493 struct blkio_policy_type *pol, unsigned long time,
494 unsigned long unaccounted_time) { }
84c124da 495static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
c1768268
TH
496 struct blkio_policy_type *pol, uint64_t bytes,
497 bool direction, bool sync) { }
84c124da 498static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
c1768268
TH
499 struct blkio_policy_type *pol, uint64_t start_time,
500 uint64_t io_start_time, bool direction, bool sync) { }
812d4026 501static inline void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
c1768268
TH
502 struct blkio_policy_type *pol, bool direction,
503 bool sync) { }
a11cdaa7 504static inline void blkiocg_update_io_add_stats(struct blkio_group *blkg,
c1768268
TH
505 struct blkio_policy_type *pol,
506 struct blkio_group *curr_blkg, bool direction,
507 bool sync) { }
a11cdaa7 508static inline void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
c1768268
TH
509 struct blkio_policy_type *pol, bool direction,
510 bool sync) { }
31e4c28d
VG
511#endif
512#endif /* _BLK_CGROUP_H */