4 * Common Block IO controller cgroup interface
6 * Based on ideas and code from CFQ, CFS and BFQ:
7 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
9 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
10 * Paolo Valente <paolo.valente@unimore.it>
12 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
13 * Nauman Rafique <nauman@google.com>
16 #include <linux/cgroup.h>
17 #include <linux/u64_stats_sync.h>
18 #include <linux/seq_file.h>
20 enum blkio_policy_id
{
21 BLKIO_POLICY_PROP
= 0, /* Proportional Bandwidth division */
22 BLKIO_POLICY_THROTL
, /* Throttling */
27 /* Max limits for throttle policy */
28 #define THROTL_IOPS_MAX UINT_MAX
30 #ifdef CONFIG_BLK_CGROUP
32 /* cft->private [un]packing for stat printing */
33 #define BLKCG_STAT_PRIV(pol, off) (((unsigned)(pol) << 16) | (off))
34 #define BLKCG_STAT_POL(prv) ((unsigned)(prv) >> 16)
35 #define BLKCG_STAT_OFF(prv) ((unsigned)(prv) & 0xffff)
37 enum blkg_rwstat_type
{
44 BLKG_RWSTAT_TOTAL
= BLKG_RWSTAT_NR
,
47 /* blkg state flags */
48 enum blkg_state_flags
{
55 struct cgroup_subsys_state css
;
58 struct hlist_head blkg_list
;
60 /* for policies to test whether associated blkcg has changed */
65 struct u64_stats_sync syncp
;
70 struct u64_stats_sync syncp
;
71 uint64_t cnt
[BLKG_RWSTAT_NR
];
74 struct blkio_group_stats
{
75 /* number of ios merged */
76 struct blkg_rwstat merged
;
77 /* total time spent on device in ns, may not be accurate w/ queueing */
78 struct blkg_rwstat service_time
;
79 /* total time spent waiting in scheduler queue in ns */
80 struct blkg_rwstat wait_time
;
81 /* number of IOs queued up */
82 struct blkg_rwstat queued
;
83 /* total disk time and nr sectors dispatched by this group */
84 struct blkg_stat time
;
85 #ifdef CONFIG_DEBUG_BLK_CGROUP
86 /* time not charged to this cgroup */
87 struct blkg_stat unaccounted_time
;
88 /* sum of number of ios queued across all samples */
89 struct blkg_stat avg_queue_size_sum
;
90 /* count of samples taken for average */
91 struct blkg_stat avg_queue_size_samples
;
92 /* how many times this group has been removed from service tree */
93 struct blkg_stat dequeue
;
94 /* total time spent waiting for it to be assigned a timeslice. */
95 struct blkg_stat group_wait_time
;
96 /* time spent idling for this blkio_group */
97 struct blkg_stat idle_time
;
98 /* total time with empty current active q with other requests queued */
99 struct blkg_stat empty_time
;
100 /* fields after this shouldn't be cleared on stat reset */
101 uint64_t start_group_wait_time
;
102 uint64_t start_idle_time
;
103 uint64_t start_empty_time
;
108 /* Per cpu blkio group stats */
109 struct blkio_group_stats_cpu
{
110 /* total bytes transferred */
111 struct blkg_rwstat service_bytes
;
112 /* total IOs serviced, post merge */
113 struct blkg_rwstat serviced
;
114 /* total sectors transferred */
115 struct blkg_stat sectors
;
118 struct blkio_group_conf
{
124 /* per-blkg per-policy data */
125 struct blkg_policy_data
{
126 /* the blkg this per-policy data belongs to */
127 struct blkio_group
*blkg
;
130 struct blkio_group_conf conf
;
132 struct blkio_group_stats stats
;
133 /* Per cpu stats pointer */
134 struct blkio_group_stats_cpu __percpu
*stats_cpu
;
136 /* pol->pdata_size bytes of private data used by policy impl */
137 char pdata
[] __aligned(__alignof__(unsigned long long));
141 /* Pointer to the associated request_queue */
142 struct request_queue
*q
;
143 struct list_head q_node
;
144 struct hlist_node blkcg_node
;
145 struct blkio_cgroup
*blkcg
;
146 /* Store cgroup path */
148 /* reference count */
151 struct blkg_policy_data
*pd
[BLKIO_NR_POLICIES
];
153 /* List of blkg waiting for per cpu stats memory to be allocated */
154 struct list_head alloc_node
;
155 struct rcu_head rcu_head
;
158 typedef void (blkio_init_group_fn
)(struct blkio_group
*blkg
);
159 typedef void (blkio_update_group_weight_fn
)(struct request_queue
*q
,
160 struct blkio_group
*blkg
, unsigned int weight
);
161 typedef void (blkio_update_group_read_bps_fn
)(struct request_queue
*q
,
162 struct blkio_group
*blkg
, u64 read_bps
);
163 typedef void (blkio_update_group_write_bps_fn
)(struct request_queue
*q
,
164 struct blkio_group
*blkg
, u64 write_bps
);
165 typedef void (blkio_update_group_read_iops_fn
)(struct request_queue
*q
,
166 struct blkio_group
*blkg
, unsigned int read_iops
);
167 typedef void (blkio_update_group_write_iops_fn
)(struct request_queue
*q
,
168 struct blkio_group
*blkg
, unsigned int write_iops
);
170 struct blkio_policy_ops
{
171 blkio_init_group_fn
*blkio_init_group_fn
;
172 blkio_update_group_weight_fn
*blkio_update_group_weight_fn
;
173 blkio_update_group_read_bps_fn
*blkio_update_group_read_bps_fn
;
174 blkio_update_group_write_bps_fn
*blkio_update_group_write_bps_fn
;
175 blkio_update_group_read_iops_fn
*blkio_update_group_read_iops_fn
;
176 blkio_update_group_write_iops_fn
*blkio_update_group_write_iops_fn
;
179 struct blkio_policy_type
{
180 struct list_head list
;
181 struct blkio_policy_ops ops
;
182 enum blkio_policy_id plid
;
183 size_t pdata_size
; /* policy specific private data size */
184 struct cftype
*cftypes
; /* cgroup files for the policy */
187 extern int blkcg_init_queue(struct request_queue
*q
);
188 extern void blkcg_drain_queue(struct request_queue
*q
);
189 extern void blkcg_exit_queue(struct request_queue
*q
);
191 /* Blkio controller policy registration */
192 extern void blkio_policy_register(struct blkio_policy_type
*);
193 extern void blkio_policy_unregister(struct blkio_policy_type
*);
194 extern void blkg_destroy_all(struct request_queue
*q
, bool destroy_root
);
195 extern void update_root_blkg_pd(struct request_queue
*q
,
196 enum blkio_policy_id plid
);
198 void blkcg_print_blkgs(struct seq_file
*sf
, struct blkio_cgroup
*blkcg
,
199 u64 (*prfill
)(struct seq_file
*, struct blkg_policy_data
*, int),
200 int pol
, int data
, bool show_total
);
201 u64
__blkg_prfill_u64(struct seq_file
*sf
, struct blkg_policy_data
*pd
, u64 v
);
202 u64
__blkg_prfill_rwstat(struct seq_file
*sf
, struct blkg_policy_data
*pd
,
203 const struct blkg_rwstat
*rwstat
);
204 int blkcg_print_stat(struct cgroup
*cgrp
, struct cftype
*cft
,
205 struct seq_file
*sf
);
206 int blkcg_print_rwstat(struct cgroup
*cgrp
, struct cftype
*cft
,
207 struct seq_file
*sf
);
208 int blkcg_print_cpu_stat(struct cgroup
*cgrp
, struct cftype
*cft
,
209 struct seq_file
*sf
);
210 int blkcg_print_cpu_rwstat(struct cgroup
*cgrp
, struct cftype
*cft
,
211 struct seq_file
*sf
);
213 struct blkg_conf_ctx
{
214 struct gendisk
*disk
;
215 struct blkio_group
*blkg
;
219 int blkg_conf_prep(struct blkio_cgroup
*blkcg
, const char *input
,
220 struct blkg_conf_ctx
*ctx
);
221 void blkg_conf_finish(struct blkg_conf_ctx
*ctx
);
225 * blkg_to_pdata - get policy private data
226 * @blkg: blkg of interest
227 * @pol: policy of interest
229 * Return pointer to private data associated with the @blkg-@pol pair.
231 static inline void *blkg_to_pdata(struct blkio_group
*blkg
,
232 struct blkio_policy_type
*pol
)
234 return blkg
? blkg
->pd
[pol
->plid
]->pdata
: NULL
;
238 * pdata_to_blkg - get blkg associated with policy private data
239 * @pdata: policy private data of interest
241 * @pdata is policy private data. Determine the blkg it's associated with.
243 static inline struct blkio_group
*pdata_to_blkg(void *pdata
)
246 struct blkg_policy_data
*pd
=
247 container_of(pdata
, struct blkg_policy_data
, pdata
);
253 static inline char *blkg_path(struct blkio_group
*blkg
)
259 * blkg_get - get a blkg reference
262 * The caller should be holding queue_lock and an existing reference.
264 static inline void blkg_get(struct blkio_group
*blkg
)
266 lockdep_assert_held(blkg
->q
->queue_lock
);
267 WARN_ON_ONCE(!blkg
->refcnt
);
271 void __blkg_release(struct blkio_group
*blkg
);
274 * blkg_put - put a blkg reference
277 * The caller should be holding queue_lock.
279 static inline void blkg_put(struct blkio_group
*blkg
)
281 lockdep_assert_held(blkg
->q
->queue_lock
);
282 WARN_ON_ONCE(blkg
->refcnt
<= 0);
284 __blkg_release(blkg
);
288 * blkg_stat_add - add a value to a blkg_stat
289 * @stat: target blkg_stat
292 * Add @val to @stat. The caller is responsible for synchronizing calls to
295 static inline void blkg_stat_add(struct blkg_stat
*stat
, uint64_t val
)
297 u64_stats_update_begin(&stat
->syncp
);
299 u64_stats_update_end(&stat
->syncp
);
303 * blkg_stat_read - read the current value of a blkg_stat
304 * @stat: blkg_stat to read
306 * Read the current value of @stat. This function can be called without
307 * synchroniztion and takes care of u64 atomicity.
309 static inline uint64_t blkg_stat_read(struct blkg_stat
*stat
)
315 start
= u64_stats_fetch_begin(&stat
->syncp
);
317 } while (u64_stats_fetch_retry(&stat
->syncp
, start
));
323 * blkg_stat_reset - reset a blkg_stat
324 * @stat: blkg_stat to reset
326 static inline void blkg_stat_reset(struct blkg_stat
*stat
)
332 * blkg_rwstat_add - add a value to a blkg_rwstat
333 * @rwstat: target blkg_rwstat
334 * @rw: mask of REQ_{WRITE|SYNC}
337 * Add @val to @rwstat. The counters are chosen according to @rw. The
338 * caller is responsible for synchronizing calls to this function.
340 static inline void blkg_rwstat_add(struct blkg_rwstat
*rwstat
,
341 int rw
, uint64_t val
)
343 u64_stats_update_begin(&rwstat
->syncp
);
346 rwstat
->cnt
[BLKG_RWSTAT_WRITE
] += val
;
348 rwstat
->cnt
[BLKG_RWSTAT_READ
] += val
;
350 rwstat
->cnt
[BLKG_RWSTAT_SYNC
] += val
;
352 rwstat
->cnt
[BLKG_RWSTAT_ASYNC
] += val
;
354 u64_stats_update_end(&rwstat
->syncp
);
358 * blkg_rwstat_read - read the current values of a blkg_rwstat
359 * @rwstat: blkg_rwstat to read
361 * Read the current snapshot of @rwstat and return it as the return value.
362 * This function can be called without synchronization and takes care of
365 static struct blkg_rwstat
blkg_rwstat_read(struct blkg_rwstat
*rwstat
)
368 struct blkg_rwstat tmp
;
371 start
= u64_stats_fetch_begin(&rwstat
->syncp
);
373 } while (u64_stats_fetch_retry(&rwstat
->syncp
, start
));
379 * blkg_rwstat_sum - read the total count of a blkg_rwstat
380 * @rwstat: blkg_rwstat to read
382 * Return the total count of @rwstat regardless of the IO direction. This
383 * function can be called without synchronization and takes care of u64
386 static inline uint64_t blkg_rwstat_sum(struct blkg_rwstat
*rwstat
)
388 struct blkg_rwstat tmp
= blkg_rwstat_read(rwstat
);
390 return tmp
.cnt
[BLKG_RWSTAT_READ
] + tmp
.cnt
[BLKG_RWSTAT_WRITE
];
394 * blkg_rwstat_reset - reset a blkg_rwstat
395 * @rwstat: blkg_rwstat to reset
397 static inline void blkg_rwstat_reset(struct blkg_rwstat
*rwstat
)
399 memset(rwstat
->cnt
, 0, sizeof(rwstat
->cnt
));
407 struct blkio_policy_type
{
410 static inline int blkcg_init_queue(struct request_queue
*q
) { return 0; }
411 static inline void blkcg_drain_queue(struct request_queue
*q
) { }
412 static inline void blkcg_exit_queue(struct request_queue
*q
) { }
413 static inline void blkio_policy_register(struct blkio_policy_type
*blkiop
) { }
414 static inline void blkio_policy_unregister(struct blkio_policy_type
*blkiop
) { }
415 static inline void blkg_destroy_all(struct request_queue
*q
,
416 bool destory_root
) { }
417 static inline void update_root_blkg_pd(struct request_queue
*q
,
418 enum blkio_policy_id plid
) { }
420 static inline void *blkg_to_pdata(struct blkio_group
*blkg
,
421 struct blkio_policy_type
*pol
) { return NULL
; }
422 static inline struct blkio_group
*pdata_to_blkg(void *pdata
,
423 struct blkio_policy_type
*pol
) { return NULL
; }
424 static inline char *blkg_path(struct blkio_group
*blkg
) { return NULL
; }
425 static inline void blkg_get(struct blkio_group
*blkg
) { }
426 static inline void blkg_put(struct blkio_group
*blkg
) { }
430 #define BLKIO_WEIGHT_MIN 10
431 #define BLKIO_WEIGHT_MAX 1000
432 #define BLKIO_WEIGHT_DEFAULT 500
434 #ifdef CONFIG_DEBUG_BLK_CGROUP
435 void blkiocg_update_avg_queue_size_stats(struct blkio_group
*blkg
,
436 struct blkio_policy_type
*pol
);
437 void blkiocg_update_dequeue_stats(struct blkio_group
*blkg
,
438 struct blkio_policy_type
*pol
,
439 unsigned long dequeue
);
440 void blkiocg_update_set_idle_time_stats(struct blkio_group
*blkg
,
441 struct blkio_policy_type
*pol
);
442 void blkiocg_update_idle_time_stats(struct blkio_group
*blkg
,
443 struct blkio_policy_type
*pol
);
444 void blkiocg_set_start_empty_time(struct blkio_group
*blkg
,
445 struct blkio_policy_type
*pol
);
447 #define BLKG_FLAG_FNS(name) \
448 static inline void blkio_mark_blkg_##name( \
449 struct blkio_group_stats *stats) \
451 stats->flags |= (1 << BLKG_##name); \
453 static inline void blkio_clear_blkg_##name( \
454 struct blkio_group_stats *stats) \
456 stats->flags &= ~(1 << BLKG_##name); \
458 static inline int blkio_blkg_##name(struct blkio_group_stats *stats) \
460 return (stats->flags & (1 << BLKG_##name)) != 0; \
463 BLKG_FLAG_FNS(waiting)
464 BLKG_FLAG_FNS(idling
)
468 static inline void blkiocg_update_avg_queue_size_stats(struct blkio_group
*blkg
,
469 struct blkio_policy_type
*pol
) { }
470 static inline void blkiocg_update_dequeue_stats(struct blkio_group
*blkg
,
471 struct blkio_policy_type
*pol
, unsigned long dequeue
) { }
472 static inline void blkiocg_update_set_idle_time_stats(struct blkio_group
*blkg
,
473 struct blkio_policy_type
*pol
) { }
474 static inline void blkiocg_update_idle_time_stats(struct blkio_group
*blkg
,
475 struct blkio_policy_type
*pol
) { }
476 static inline void blkiocg_set_start_empty_time(struct blkio_group
*blkg
,
477 struct blkio_policy_type
*pol
) { }
480 #ifdef CONFIG_BLK_CGROUP
481 extern struct blkio_cgroup blkio_root_cgroup
;
482 extern struct blkio_cgroup
*cgroup_to_blkio_cgroup(struct cgroup
*cgroup
);
483 extern struct blkio_cgroup
*bio_blkio_cgroup(struct bio
*bio
);
484 extern struct blkio_group
*blkg_lookup(struct blkio_cgroup
*blkcg
,
485 struct request_queue
*q
);
486 struct blkio_group
*blkg_lookup_create(struct blkio_cgroup
*blkcg
,
487 struct request_queue
*q
,
489 void blkiocg_update_timeslice_used(struct blkio_group
*blkg
,
490 struct blkio_policy_type
*pol
,
492 unsigned long unaccounted_time
);
493 void blkiocg_update_dispatch_stats(struct blkio_group
*blkg
,
494 struct blkio_policy_type
*pol
,
495 uint64_t bytes
, bool direction
, bool sync
);
496 void blkiocg_update_completion_stats(struct blkio_group
*blkg
,
497 struct blkio_policy_type
*pol
,
499 uint64_t io_start_time
, bool direction
,
501 void blkiocg_update_io_merged_stats(struct blkio_group
*blkg
,
502 struct blkio_policy_type
*pol
,
503 bool direction
, bool sync
);
504 void blkiocg_update_io_add_stats(struct blkio_group
*blkg
,
505 struct blkio_policy_type
*pol
,
506 struct blkio_group
*curr_blkg
, bool direction
,
508 void blkiocg_update_io_remove_stats(struct blkio_group
*blkg
,
509 struct blkio_policy_type
*pol
,
510 bool direction
, bool sync
);
513 static inline struct blkio_cgroup
*
514 cgroup_to_blkio_cgroup(struct cgroup
*cgroup
) { return NULL
; }
515 static inline struct blkio_cgroup
*
516 bio_blkio_cgroup(struct bio
*bio
) { return NULL
; }
518 static inline struct blkio_group
*blkg_lookup(struct blkio_cgroup
*blkcg
,
519 void *key
) { return NULL
; }
520 static inline void blkiocg_update_timeslice_used(struct blkio_group
*blkg
,
521 struct blkio_policy_type
*pol
, unsigned long time
,
522 unsigned long unaccounted_time
) { }
523 static inline void blkiocg_update_dispatch_stats(struct blkio_group
*blkg
,
524 struct blkio_policy_type
*pol
, uint64_t bytes
,
525 bool direction
, bool sync
) { }
526 static inline void blkiocg_update_completion_stats(struct blkio_group
*blkg
,
527 struct blkio_policy_type
*pol
, uint64_t start_time
,
528 uint64_t io_start_time
, bool direction
, bool sync
) { }
529 static inline void blkiocg_update_io_merged_stats(struct blkio_group
*blkg
,
530 struct blkio_policy_type
*pol
, bool direction
,
532 static inline void blkiocg_update_io_add_stats(struct blkio_group
*blkg
,
533 struct blkio_policy_type
*pol
,
534 struct blkio_group
*curr_blkg
, bool direction
,
536 static inline void blkiocg_update_io_remove_stats(struct blkio_group
*blkg
,
537 struct blkio_policy_type
*pol
, bool direction
,
540 #endif /* _BLK_CGROUP_H */