]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
31e4c28d VG |
2 | #ifndef _BLK_CGROUP_H |
3 | #define _BLK_CGROUP_H | |
4 | /* | |
5 | * Common Block IO controller cgroup interface | |
6 | * | |
7 | * Based on ideas and code from CFQ, CFS and BFQ: | |
8 | * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> | |
9 | * | |
10 | * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> | |
11 | * Paolo Valente <paolo.valente@unimore.it> | |
12 | * | |
13 | * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> | |
14 | * Nauman Rafique <nauman@google.com> | |
15 | */ | |
16 | ||
17 | #include <linux/cgroup.h> | |
24bdb8ef | 18 | #include <linux/percpu_counter.h> |
829fdb50 | 19 | #include <linux/seq_file.h> |
a637120e | 20 | #include <linux/radix-tree.h> |
a051661c | 21 | #include <linux/blkdev.h> |
a5049a8a | 22 | #include <linux/atomic.h> |
902ec5b6 | 23 | #include <linux/kthread.h> |
5cdf2e3f | 24 | #include <linux/fs.h> |
31e4c28d | 25 | |
24bdb8ef TH |
26 | /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ |
27 | #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) | |
28 | ||
9355aede VG |
29 | /* Max limits for throttle policy */ |
30 | #define THROTL_IOPS_MAX UINT_MAX | |
31 | ||
f48ec1d7 TH |
32 | #ifdef CONFIG_BLK_CGROUP |
33 | ||
edcb0722 TH |
34 | enum blkg_rwstat_type { |
35 | BLKG_RWSTAT_READ, | |
36 | BLKG_RWSTAT_WRITE, | |
37 | BLKG_RWSTAT_SYNC, | |
38 | BLKG_RWSTAT_ASYNC, | |
636620b6 | 39 | BLKG_RWSTAT_DISCARD, |
edcb0722 TH |
40 | |
41 | BLKG_RWSTAT_NR, | |
42 | BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, | |
303a3acb DS |
43 | }; |
44 | ||
a637120e TH |
45 | struct blkcg_gq; |
46 | ||
3c798398 | 47 | struct blkcg { |
36558c8a TH |
48 | struct cgroup_subsys_state css; |
49 | spinlock_t lock; | |
a637120e TH |
50 | |
51 | struct radix_tree_root blkg_tree; | |
55679c8d | 52 | struct blkcg_gq __rcu *blkg_hint; |
36558c8a | 53 | struct hlist_head blkg_list; |
9a9e8a26 | 54 | |
81437648 | 55 | struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; |
52ebea74 | 56 | |
7876f930 | 57 | struct list_head all_blkcgs_node; |
52ebea74 TH |
58 | #ifdef CONFIG_CGROUP_WRITEBACK |
59 | struct list_head cgwb_list; | |
59b57717 | 60 | refcount_t cgwb_refcnt; |
52ebea74 | 61 | #endif |
31e4c28d VG |
62 | }; |
63 | ||
e6269c44 TH |
64 | /* |
65 | * blkg_[rw]stat->aux_cnt is excluded for local stats but included for | |
7af6fd91 | 66 | * recursive. Used to carry stats of dead children. |
e6269c44 | 67 | */ |
edcb0722 | 68 | struct blkg_rwstat { |
24bdb8ef | 69 | struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR]; |
e6269c44 | 70 | atomic64_t aux_cnt[BLKG_RWSTAT_NR]; |
edcb0722 TH |
71 | }; |
72 | ||
7af6fd91 CH |
73 | struct blkg_rwstat_sample { |
74 | u64 cnt[BLKG_RWSTAT_NR]; | |
75 | }; | |
76 | ||
f95a04af TH |
77 | /* |
78 | * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a | |
79 | * request_queue (q). This is used by blkcg policies which need to track | |
80 | * information per blkcg - q pair. | |
81 | * | |
001bea73 TH |
82 | * There can be multiple active blkcg policies and each blkg:policy pair is |
83 | * represented by a blkg_policy_data which is allocated and freed by each | |
84 | * policy's pd_alloc/free_fn() methods. A policy can allocate private data | |
85 | * area by allocating larger data structure which embeds blkg_policy_data | |
86 | * at the beginning. | |
f95a04af | 87 | */ |
0381411e | 88 | struct blkg_policy_data { |
b276a876 | 89 | /* the blkg and policy id this per-policy data belongs to */ |
3c798398 | 90 | struct blkcg_gq *blkg; |
b276a876 | 91 | int plid; |
0381411e TH |
92 | }; |
93 | ||
e48453c3 | 94 | /* |
e4a9bde9 TH |
95 | * Policies that need to keep per-blkcg data which is independent from any |
96 | * request_queue associated to it should implement cpd_alloc/free_fn() | |
97 | * methods. A policy can allocate private data area by allocating larger | |
98 | * data structure which embeds blkcg_policy_data at the beginning. | |
99 | * cpd_init() is invoked to let each policy handle per-blkcg data. | |
e48453c3 AA |
100 | */ |
101 | struct blkcg_policy_data { | |
81437648 TH |
102 | /* the blkcg and policy id this per-policy data belongs to */ |
103 | struct blkcg *blkcg; | |
e48453c3 | 104 | int plid; |
e48453c3 AA |
105 | }; |
106 | ||
3c798398 TH |
107 | /* association between a blk cgroup and a request queue */ |
108 | struct blkcg_gq { | |
c875f4d0 | 109 | /* Pointer to the associated request_queue */ |
36558c8a TH |
110 | struct request_queue *q; |
111 | struct list_head q_node; | |
112 | struct hlist_node blkcg_node; | |
3c798398 | 113 | struct blkcg *blkcg; |
3c547865 | 114 | |
ce7acfea TH |
115 | /* |
116 | * Each blkg gets congested separately and the congestion state is | |
117 | * propagated to the matching bdi_writeback_congested. | |
118 | */ | |
119 | struct bdi_writeback_congested *wb_congested; | |
120 | ||
3c547865 TH |
121 | /* all non-root blkcg_gq's are guaranteed to have access to parent */ |
122 | struct blkcg_gq *parent; | |
123 | ||
1adaf3dd | 124 | /* reference count */ |
7fcf2b03 | 125 | struct percpu_ref refcnt; |
22084190 | 126 | |
f427d909 TH |
127 | /* is this blkg online? protected by both blkcg and q locks */ |
128 | bool online; | |
129 | ||
77ea7338 TH |
130 | struct blkg_rwstat stat_bytes; |
131 | struct blkg_rwstat stat_ios; | |
132 | ||
36558c8a | 133 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; |
1adaf3dd | 134 | |
d3f77dfd TH |
135 | spinlock_t async_bio_lock; |
136 | struct bio_list async_bios; | |
137 | struct work_struct async_bio_work; | |
d09d8df3 JB |
138 | |
139 | atomic_t use_delay; | |
140 | atomic64_t delay_nsec; | |
141 | atomic64_t delay_start; | |
142 | u64 last_delay; | |
143 | int last_use; | |
d3f77dfd TH |
144 | |
145 | struct rcu_head rcu_head; | |
31e4c28d VG |
146 | }; |
147 | ||
e4a9bde9 | 148 | typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); |
81437648 | 149 | typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); |
e4a9bde9 | 150 | typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); |
69d7fde5 | 151 | typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); |
cf09a8ee TH |
152 | typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, |
153 | struct request_queue *q, struct blkcg *blkcg); | |
a9520cd6 TH |
154 | typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); |
155 | typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); | |
156 | typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); | |
001bea73 | 157 | typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); |
a9520cd6 | 158 | typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); |
903d23f0 JB |
159 | typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf, |
160 | size_t size); | |
3e252066 | 161 | |
3c798398 | 162 | struct blkcg_policy { |
36558c8a | 163 | int plid; |
36558c8a | 164 | /* cgroup files for the policy */ |
2ee867dc | 165 | struct cftype *dfl_cftypes; |
880f50e2 | 166 | struct cftype *legacy_cftypes; |
f9fcc2d3 TH |
167 | |
168 | /* operations */ | |
e4a9bde9 | 169 | blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; |
e48453c3 | 170 | blkcg_pol_init_cpd_fn *cpd_init_fn; |
e4a9bde9 | 171 | blkcg_pol_free_cpd_fn *cpd_free_fn; |
69d7fde5 | 172 | blkcg_pol_bind_cpd_fn *cpd_bind_fn; |
e4a9bde9 | 173 | |
001bea73 | 174 | blkcg_pol_alloc_pd_fn *pd_alloc_fn; |
f9fcc2d3 | 175 | blkcg_pol_init_pd_fn *pd_init_fn; |
f427d909 TH |
176 | blkcg_pol_online_pd_fn *pd_online_fn; |
177 | blkcg_pol_offline_pd_fn *pd_offline_fn; | |
001bea73 | 178 | blkcg_pol_free_pd_fn *pd_free_fn; |
f9fcc2d3 | 179 | blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; |
903d23f0 | 180 | blkcg_pol_stat_pd_fn *pd_stat_fn; |
3e252066 VG |
181 | }; |
182 | ||
3c798398 | 183 | extern struct blkcg blkcg_root; |
496d5e75 | 184 | extern struct cgroup_subsys_state * const blkcg_root_css; |
07b0fdec | 185 | extern bool blkcg_debug_stats; |
36558c8a | 186 | |
24f29046 TH |
187 | struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, |
188 | struct request_queue *q, bool update_hint); | |
b978962a DZ |
189 | struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, |
190 | struct request_queue *q); | |
3c798398 TH |
191 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
192 | struct request_queue *q); | |
36558c8a TH |
193 | int blkcg_init_queue(struct request_queue *q); |
194 | void blkcg_drain_queue(struct request_queue *q); | |
195 | void blkcg_exit_queue(struct request_queue *q); | |
5efd6113 | 196 | |
3e252066 | 197 | /* Blkio controller policy registration */ |
d5bf0291 | 198 | int blkcg_policy_register(struct blkcg_policy *pol); |
3c798398 | 199 | void blkcg_policy_unregister(struct blkcg_policy *pol); |
36558c8a | 200 | int blkcg_activate_policy(struct request_queue *q, |
3c798398 | 201 | const struct blkcg_policy *pol); |
36558c8a | 202 | void blkcg_deactivate_policy(struct request_queue *q, |
3c798398 | 203 | const struct blkcg_policy *pol); |
3e252066 | 204 | |
239eeb08 CH |
205 | static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat, |
206 | unsigned int idx) | |
207 | { | |
208 | return atomic64_read(&rwstat->aux_cnt[idx]) + | |
209 | percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]); | |
210 | } | |
211 | ||
dd165eb3 | 212 | const char *blkg_dev_name(struct blkcg_gq *blkg); |
3c798398 | 213 | void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, |
f95a04af TH |
214 | u64 (*prfill)(struct seq_file *, |
215 | struct blkg_policy_data *, int), | |
3c798398 | 216 | const struct blkcg_policy *pol, int data, |
ec399347 | 217 | bool show_total); |
f95a04af TH |
218 | u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); |
219 | u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, | |
7af6fd91 | 220 | const struct blkg_rwstat_sample *rwstat); |
f95a04af TH |
221 | u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, |
222 | int off); | |
5d0b6e48 | 223 | void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, |
7af6fd91 | 224 | int off, struct blkg_rwstat_sample *sum); |
16b3de66 | 225 | |
829fdb50 | 226 | struct blkg_conf_ctx { |
36558c8a | 227 | struct gendisk *disk; |
3c798398 | 228 | struct blkcg_gq *blkg; |
36aa9e5f | 229 | char *body; |
829fdb50 TH |
230 | }; |
231 | ||
015d254c | 232 | struct gendisk *blkcg_conf_get_disk(char **inputp); |
3c798398 | 233 | int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, |
36aa9e5f | 234 | char *input, struct blkg_conf_ctx *ctx); |
829fdb50 TH |
235 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); |
236 | ||
0fe061b9 DZ |
237 | /** |
238 | * blkcg_css - find the current css | |
239 | * | |
240 | * Find the css associated with either the kthread or the current task. | |
241 | * This may return a dying css, so it is up to the caller to use tryget logic | |
242 | * to confirm it is alive and well. | |
243 | */ | |
244 | static inline struct cgroup_subsys_state *blkcg_css(void) | |
245 | { | |
246 | struct cgroup_subsys_state *css; | |
247 | ||
248 | css = kthread_blkcg(); | |
249 | if (css) | |
250 | return css; | |
251 | return task_css(current, io_cgrp_id); | |
252 | } | |
253 | ||
a7c6d554 TH |
254 | static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) |
255 | { | |
256 | return css ? container_of(css, struct blkcg, css) : NULL; | |
257 | } | |
258 | ||
0fe061b9 DZ |
259 | /** |
260 | * __bio_blkcg - internal, inconsistent version to get blkcg | |
261 | * | |
262 | * DO NOT USE. | |
263 | * This function is inconsistent and consequently is dangerous to use. The | |
264 | * first part of the function returns a blkcg where a reference is owned by the | |
265 | * bio. This means it does not need to be rcu protected as it cannot go away | |
266 | * with the bio owning a reference to it. However, the latter potentially gets | |
267 | * it from task_css(). This can race against task migration and the cgroup | |
268 | * dying. It is also semantically different as it must be called rcu protected | |
269 | * and is susceptible to failure when trying to get a reference to it. | |
270 | * Therefore, it is not ok to assume that *_get() will always succeed on the | |
271 | * blkcg returned here. | |
272 | */ | |
273 | static inline struct blkcg *__bio_blkcg(struct bio *bio) | |
27e6fa99 | 274 | { |
db6638d7 DZ |
275 | if (bio && bio->bi_blkg) |
276 | return bio->bi_blkg->blkcg; | |
0fe061b9 DZ |
277 | return css_to_blkcg(blkcg_css()); |
278 | } | |
b5f2954d | 279 | |
0fe061b9 DZ |
280 | /** |
281 | * bio_blkcg - grab the blkcg associated with a bio | |
282 | * @bio: target bio | |
283 | * | |
284 | * This returns the blkcg associated with a bio, %NULL if not associated. | |
285 | * Callers are expected to either handle %NULL or know association has been | |
286 | * done prior to calling this. | |
287 | */ | |
288 | static inline struct blkcg *bio_blkcg(struct bio *bio) | |
289 | { | |
db6638d7 DZ |
290 | if (bio && bio->bi_blkg) |
291 | return bio->bi_blkg->blkcg; | |
0fe061b9 | 292 | return NULL; |
fd383c2d TH |
293 | } |
294 | ||
d09d8df3 JB |
295 | static inline bool blk_cgroup_congested(void) |
296 | { | |
297 | struct cgroup_subsys_state *css; | |
298 | bool ret = false; | |
299 | ||
300 | rcu_read_lock(); | |
301 | css = kthread_blkcg(); | |
302 | if (!css) | |
303 | css = task_css(current, io_cgrp_id); | |
304 | while (css) { | |
305 | if (atomic_read(&css->cgroup->congestion_count)) { | |
306 | ret = true; | |
307 | break; | |
308 | } | |
309 | css = css->parent; | |
310 | } | |
311 | rcu_read_unlock(); | |
312 | return ret; | |
313 | } | |
314 | ||
c7c98fd3 JB |
315 | /** |
316 | * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg | |
317 | * @return: true if this bio needs to be submitted with the root blkg context. | |
318 | * | |
319 | * In order to avoid priority inversions we sometimes need to issue a bio as if | |
320 | * it were attached to the root blkg, and then backcharge to the actual owning | |
321 | * blkg. The idea is we do bio_blkcg() to look up the actual context for the | |
322 | * bio and attach the appropriate blkg to the bio. Then we call this helper and | |
323 | * if it is true run with the root blkg for that queue and then do any | |
324 | * backcharging to the originating cgroup once the io is complete. | |
325 | */ | |
326 | static inline bool bio_issue_as_root_blkg(struct bio *bio) | |
327 | { | |
0d1e0c7c | 328 | return (bio->bi_opf & (REQ_META | REQ_SWAP)) != 0; |
c7c98fd3 JB |
329 | } |
330 | ||
3c547865 TH |
331 | /** |
332 | * blkcg_parent - get the parent of a blkcg | |
333 | * @blkcg: blkcg of interest | |
334 | * | |
335 | * Return the parent blkcg of @blkcg. Can be called anytime. | |
336 | */ | |
337 | static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) | |
338 | { | |
5c9d535b | 339 | return css_to_blkcg(blkcg->css.parent); |
3c547865 TH |
340 | } |
341 | ||
24f29046 TH |
342 | /** |
343 | * __blkg_lookup - internal version of blkg_lookup() | |
344 | * @blkcg: blkcg of interest | |
345 | * @q: request_queue of interest | |
346 | * @update_hint: whether to update lookup hint with the result or not | |
347 | * | |
348 | * This is internal version and shouldn't be used by policy | |
349 | * implementations. Looks up blkgs for the @blkcg - @q pair regardless of | |
350 | * @q's bypass state. If @update_hint is %true, the caller should be | |
351 | * holding @q->queue_lock and lookup hint is updated on success. | |
352 | */ | |
353 | static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, | |
354 | struct request_queue *q, | |
355 | bool update_hint) | |
356 | { | |
357 | struct blkcg_gq *blkg; | |
358 | ||
85b6bc9d TH |
359 | if (blkcg == &blkcg_root) |
360 | return q->root_blkg; | |
361 | ||
24f29046 TH |
362 | blkg = rcu_dereference(blkcg->blkg_hint); |
363 | if (blkg && blkg->q == q) | |
364 | return blkg; | |
365 | ||
366 | return blkg_lookup_slowpath(blkcg, q, update_hint); | |
367 | } | |
368 | ||
369 | /** | |
370 | * blkg_lookup - lookup blkg for the specified blkcg - q pair | |
371 | * @blkcg: blkcg of interest | |
372 | * @q: request_queue of interest | |
373 | * | |
374 | * Lookup blkg for the @blkcg - @q pair. This function should be called | |
012d4a65 | 375 | * under RCU read lock. |
24f29046 TH |
376 | */ |
377 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, | |
378 | struct request_queue *q) | |
379 | { | |
380 | WARN_ON_ONCE(!rcu_read_lock_held()); | |
24f29046 TH |
381 | return __blkg_lookup(blkcg, q, false); |
382 | } | |
383 | ||
6bad9b21 | 384 | /** |
b86d865c | 385 | * blk_queue_root_blkg - return blkg for the (blkcg_root, @q) pair |
6bad9b21 BVA |
386 | * @q: request_queue of interest |
387 | * | |
388 | * Lookup blkg for @q at the root level. See also blkg_lookup(). | |
389 | */ | |
b86d865c | 390 | static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) |
6bad9b21 | 391 | { |
b86d865c | 392 | return q->root_blkg; |
6bad9b21 BVA |
393 | } |
394 | ||
0381411e TH |
395 | /** |
396 | * blkg_to_pdata - get policy private data | |
397 | * @blkg: blkg of interest | |
398 | * @pol: policy of interest | |
399 | * | |
400 | * Return pointer to private data associated with the @blkg-@pol pair. | |
401 | */ | |
f95a04af TH |
402 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
403 | struct blkcg_policy *pol) | |
0381411e | 404 | { |
f95a04af | 405 | return blkg ? blkg->pd[pol->plid] : NULL; |
0381411e TH |
406 | } |
407 | ||
e48453c3 AA |
408 | static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, |
409 | struct blkcg_policy *pol) | |
410 | { | |
81437648 | 411 | return blkcg ? blkcg->cpd[pol->plid] : NULL; |
e48453c3 AA |
412 | } |
413 | ||
0381411e TH |
414 | /** |
415 | * pdata_to_blkg - get blkg associated with policy private data | |
f95a04af | 416 | * @pd: policy private data of interest |
0381411e | 417 | * |
f95a04af | 418 | * @pd is policy private data. Determine the blkg it's associated with. |
0381411e | 419 | */ |
f95a04af | 420 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) |
0381411e | 421 | { |
f95a04af | 422 | return pd ? pd->blkg : NULL; |
0381411e TH |
423 | } |
424 | ||
81437648 TH |
425 | static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) |
426 | { | |
427 | return cpd ? cpd->blkcg : NULL; | |
428 | } | |
429 | ||
59b57717 DZF |
430 | extern void blkcg_destroy_blkgs(struct blkcg *blkcg); |
431 | ||
432 | #ifdef CONFIG_CGROUP_WRITEBACK | |
433 | ||
434 | /** | |
435 | * blkcg_cgwb_get - get a reference for blkcg->cgwb_list | |
436 | * @blkcg: blkcg of interest | |
437 | * | |
438 | * This is used to track the number of active wb's related to a blkcg. | |
439 | */ | |
440 | static inline void blkcg_cgwb_get(struct blkcg *blkcg) | |
441 | { | |
442 | refcount_inc(&blkcg->cgwb_refcnt); | |
443 | } | |
444 | ||
445 | /** | |
446 | * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list | |
447 | * @blkcg: blkcg of interest | |
448 | * | |
449 | * This is used to track the number of active wb's related to a blkcg. | |
450 | * When this count goes to zero, all active wb has finished so the | |
451 | * blkcg can continue destruction by calling blkcg_destroy_blkgs(). | |
452 | * This work may occur in cgwb_release_workfn() on the cgwb_release | |
453 | * workqueue. | |
454 | */ | |
455 | static inline void blkcg_cgwb_put(struct blkcg *blkcg) | |
456 | { | |
457 | if (refcount_dec_and_test(&blkcg->cgwb_refcnt)) | |
458 | blkcg_destroy_blkgs(blkcg); | |
459 | } | |
460 | ||
461 | #else | |
462 | ||
463 | static inline void blkcg_cgwb_get(struct blkcg *blkcg) { } | |
464 | ||
465 | static inline void blkcg_cgwb_put(struct blkcg *blkcg) | |
466 | { | |
467 | /* wb isn't being accounted, so trigger destruction right away */ | |
468 | blkcg_destroy_blkgs(blkcg); | |
469 | } | |
470 | ||
471 | #endif | |
472 | ||
54e7ed12 TH |
473 | /** |
474 | * blkg_path - format cgroup path of blkg | |
475 | * @blkg: blkg of interest | |
476 | * @buf: target buffer | |
477 | * @buflen: target buffer length | |
478 | * | |
479 | * Format the path of the cgroup of @blkg into @buf. | |
480 | */ | |
3c798398 | 481 | static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) |
afc24d49 | 482 | { |
4c737b41 | 483 | return cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); |
afc24d49 VG |
484 | } |
485 | ||
1adaf3dd TH |
486 | /** |
487 | * blkg_get - get a blkg reference | |
488 | * @blkg: blkg to get | |
489 | * | |
a5049a8a | 490 | * The caller should be holding an existing reference. |
1adaf3dd | 491 | */ |
3c798398 | 492 | static inline void blkg_get(struct blkcg_gq *blkg) |
1adaf3dd | 493 | { |
7fcf2b03 | 494 | percpu_ref_get(&blkg->refcnt); |
1adaf3dd TH |
495 | } |
496 | ||
d09d8df3 | 497 | /** |
7754f669 | 498 | * blkg_tryget - try and get a blkg reference |
d09d8df3 JB |
499 | * @blkg: blkg to get |
500 | * | |
501 | * This is for use when doing an RCU lookup of the blkg. We may be in the midst | |
502 | * of freeing this blkg, so we can only use it if the refcnt is not zero. | |
503 | */ | |
7754f669 | 504 | static inline bool blkg_tryget(struct blkcg_gq *blkg) |
d09d8df3 | 505 | { |
6ab21879 | 506 | return blkg && percpu_ref_tryget(&blkg->refcnt); |
d09d8df3 JB |
507 | } |
508 | ||
beea9da0 | 509 | /** |
7754f669 | 510 | * blkg_tryget_closest - try and get a blkg ref on the closet blkg |
beea9da0 DZ |
511 | * @blkg: blkg to get |
512 | * | |
6ab21879 DZ |
513 | * This needs to be called rcu protected. As the failure mode here is to walk |
514 | * up the blkg tree, this ensure that the blkg->parent pointers are always | |
515 | * valid. This returns the blkg that it ended up taking a reference on or %NULL | |
516 | * if no reference was taken. | |
beea9da0 | 517 | */ |
7754f669 | 518 | static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) |
beea9da0 | 519 | { |
6ab21879 DZ |
520 | struct blkcg_gq *ret_blkg = NULL; |
521 | ||
522 | WARN_ON_ONCE(!rcu_read_lock_held()); | |
523 | ||
524 | while (blkg) { | |
525 | if (blkg_tryget(blkg)) { | |
526 | ret_blkg = blkg; | |
527 | break; | |
528 | } | |
beea9da0 | 529 | blkg = blkg->parent; |
6ab21879 | 530 | } |
beea9da0 | 531 | |
6ab21879 | 532 | return ret_blkg; |
beea9da0 | 533 | } |
07b05bcc | 534 | |
1adaf3dd TH |
535 | /** |
536 | * blkg_put - put a blkg reference | |
537 | * @blkg: blkg to put | |
1adaf3dd | 538 | */ |
3c798398 | 539 | static inline void blkg_put(struct blkcg_gq *blkg) |
1adaf3dd | 540 | { |
7fcf2b03 | 541 | percpu_ref_put(&blkg->refcnt); |
1adaf3dd TH |
542 | } |
543 | ||
dd4a4ffc TH |
544 | /** |
545 | * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants | |
546 | * @d_blkg: loop cursor pointing to the current descendant | |
492eb21b | 547 | * @pos_css: used for iteration |
dd4a4ffc TH |
548 | * @p_blkg: target blkg to walk descendants of |
549 | * | |
550 | * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU | |
551 | * read locked. If called under either blkcg or queue lock, the iteration | |
552 | * is guaranteed to include all and only online blkgs. The caller may | |
492eb21b | 553 | * update @pos_css by calling css_rightmost_descendant() to skip subtree. |
bd8815a6 | 554 | * @p_blkg is included in the iteration and the first node to be visited. |
dd4a4ffc | 555 | */ |
492eb21b TH |
556 | #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ |
557 | css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ | |
558 | if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ | |
dd4a4ffc TH |
559 | (p_blkg)->q, false))) |
560 | ||
aa539cb3 TH |
561 | /** |
562 | * blkg_for_each_descendant_post - post-order walk of a blkg's descendants | |
563 | * @d_blkg: loop cursor pointing to the current descendant | |
492eb21b | 564 | * @pos_css: used for iteration |
aa539cb3 TH |
565 | * @p_blkg: target blkg to walk descendants of |
566 | * | |
567 | * Similar to blkg_for_each_descendant_pre() but performs post-order | |
bd8815a6 TH |
568 | * traversal instead. Synchronization rules are the same. @p_blkg is |
569 | * included in the iteration and the last node to be visited. | |
aa539cb3 | 570 | */ |
492eb21b TH |
571 | #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ |
572 | css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ | |
573 | if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ | |
aa539cb3 TH |
574 | (p_blkg)->q, false))) |
575 | ||
24bdb8ef | 576 | static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp) |
90d3839b | 577 | { |
24bdb8ef TH |
578 | int i, ret; |
579 | ||
580 | for (i = 0; i < BLKG_RWSTAT_NR; i++) { | |
581 | ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp); | |
582 | if (ret) { | |
583 | while (--i >= 0) | |
584 | percpu_counter_destroy(&rwstat->cpu_cnt[i]); | |
585 | return ret; | |
586 | } | |
587 | atomic64_set(&rwstat->aux_cnt[i], 0); | |
588 | } | |
589 | return 0; | |
590 | } | |
e6269c44 | 591 | |
24bdb8ef TH |
592 | static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat) |
593 | { | |
594 | int i; | |
e6269c44 TH |
595 | |
596 | for (i = 0; i < BLKG_RWSTAT_NR; i++) | |
24bdb8ef | 597 | percpu_counter_destroy(&rwstat->cpu_cnt[i]); |
90d3839b PZ |
598 | } |
599 | ||
edcb0722 TH |
600 | /** |
601 | * blkg_rwstat_add - add a value to a blkg_rwstat | |
602 | * @rwstat: target blkg_rwstat | |
ef295ecf | 603 | * @op: REQ_OP and flags |
edcb0722 TH |
604 | * @val: value to add |
605 | * | |
606 | * Add @val to @rwstat. The counters are chosen according to @rw. The | |
607 | * caller is responsible for synchronizing calls to this function. | |
608 | */ | |
609 | static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, | |
ef295ecf | 610 | unsigned int op, uint64_t val) |
edcb0722 | 611 | { |
24bdb8ef | 612 | struct percpu_counter *cnt; |
edcb0722 | 613 | |
636620b6 TH |
614 | if (op_is_discard(op)) |
615 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_DISCARD]; | |
616 | else if (op_is_write(op)) | |
24bdb8ef | 617 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; |
edcb0722 | 618 | else |
24bdb8ef TH |
619 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; |
620 | ||
104b4e51 | 621 | percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); |
24bdb8ef | 622 | |
d71d9ae1 | 623 | if (op_is_sync(op)) |
24bdb8ef | 624 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; |
edcb0722 | 625 | else |
24bdb8ef | 626 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; |
edcb0722 | 627 | |
104b4e51 | 628 | percpu_counter_add_batch(cnt, val, BLKG_STAT_CPU_BATCH); |
edcb0722 TH |
629 | } |
630 | ||
631 | /** | |
632 | * blkg_rwstat_read - read the current values of a blkg_rwstat | |
633 | * @rwstat: blkg_rwstat to read | |
634 | * | |
24bdb8ef | 635 | * Read the current snapshot of @rwstat and return it in the aux counts. |
edcb0722 | 636 | */ |
5d0b6e48 | 637 | static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, |
7af6fd91 | 638 | struct blkg_rwstat_sample *result) |
edcb0722 | 639 | { |
24bdb8ef | 640 | int i; |
edcb0722 | 641 | |
24bdb8ef | 642 | for (i = 0; i < BLKG_RWSTAT_NR; i++) |
7af6fd91 CH |
643 | result->cnt[i] = |
644 | percpu_counter_sum_positive(&rwstat->cpu_cnt[i]); | |
edcb0722 TH |
645 | } |
646 | ||
647 | /** | |
4d5e80a7 | 648 | * blkg_rwstat_total - read the total count of a blkg_rwstat |
edcb0722 TH |
649 | * @rwstat: blkg_rwstat to read |
650 | * | |
651 | * Return the total count of @rwstat regardless of the IO direction. This | |
652 | * function can be called without synchronization and takes care of u64 | |
653 | * atomicity. | |
654 | */ | |
4d5e80a7 | 655 | static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) |
edcb0722 | 656 | { |
7af6fd91 | 657 | struct blkg_rwstat_sample tmp = { }; |
edcb0722 | 658 | |
5d0b6e48 | 659 | blkg_rwstat_read(rwstat, &tmp); |
7af6fd91 | 660 | return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; |
edcb0722 TH |
661 | } |
662 | ||
663 | /** | |
664 | * blkg_rwstat_reset - reset a blkg_rwstat | |
665 | * @rwstat: blkg_rwstat to reset | |
666 | */ | |
667 | static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) | |
668 | { | |
e6269c44 TH |
669 | int i; |
670 | ||
24bdb8ef TH |
671 | for (i = 0; i < BLKG_RWSTAT_NR; i++) { |
672 | percpu_counter_set(&rwstat->cpu_cnt[i], 0); | |
e6269c44 | 673 | atomic64_set(&rwstat->aux_cnt[i], 0); |
24bdb8ef | 674 | } |
edcb0722 TH |
675 | } |
676 | ||
16b3de66 | 677 | /** |
e6269c44 | 678 | * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count |
16b3de66 TH |
679 | * @to: the destination blkg_rwstat |
680 | * @from: the source | |
681 | * | |
e6269c44 | 682 | * Add @from's count including the aux one to @to's aux count. |
16b3de66 | 683 | */ |
e6269c44 TH |
684 | static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to, |
685 | struct blkg_rwstat *from) | |
16b3de66 | 686 | { |
ddc21231 | 687 | u64 sum[BLKG_RWSTAT_NR]; |
16b3de66 TH |
688 | int i; |
689 | ||
16b3de66 | 690 | for (i = 0; i < BLKG_RWSTAT_NR; i++) |
ddc21231 AB |
691 | sum[i] = percpu_counter_sum_positive(&from->cpu_cnt[i]); |
692 | ||
693 | for (i = 0; i < BLKG_RWSTAT_NR; i++) | |
694 | atomic64_add(sum[i] + atomic64_read(&from->aux_cnt[i]), | |
e6269c44 | 695 | &to->aux_cnt[i]); |
16b3de66 TH |
696 | } |
697 | ||
ae118896 TH |
698 | #ifdef CONFIG_BLK_DEV_THROTTLING |
699 | extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, | |
700 | struct bio *bio); | |
701 | #else | |
702 | static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, | |
703 | struct bio *bio) { return false; } | |
704 | #endif | |
705 | ||
d3f77dfd TH |
706 | bool __blkcg_punt_bio_submit(struct bio *bio); |
707 | ||
708 | static inline bool blkcg_punt_bio_submit(struct bio *bio) | |
709 | { | |
710 | if (bio->bi_opf & REQ_CGROUP_PUNT) | |
711 | return __blkcg_punt_bio_submit(bio); | |
712 | else | |
713 | return false; | |
714 | } | |
e439bedf DZ |
715 | |
716 | static inline void blkcg_bio_issue_init(struct bio *bio) | |
717 | { | |
718 | bio_issue_init(&bio->bi_issue, bio_sectors(bio)); | |
719 | } | |
720 | ||
ae118896 TH |
721 | static inline bool blkcg_bio_issue_check(struct request_queue *q, |
722 | struct bio *bio) | |
723 | { | |
ae118896 TH |
724 | struct blkcg_gq *blkg; |
725 | bool throtl = false; | |
726 | ||
4705de73 DZ |
727 | rcu_read_lock(); |
728 | ||
5cdf2e3f DZ |
729 | if (!bio->bi_blkg) { |
730 | char b[BDEVNAME_SIZE]; | |
731 | ||
732 | WARN_ONCE(1, | |
733 | "no blkg associated for bio on block-device: %s\n", | |
734 | bio_devname(bio, b)); | |
735 | bio_associate_blkg(bio); | |
736 | } | |
b5f2954d | 737 | |
5cdf2e3f | 738 | blkg = bio->bi_blkg; |
ae118896 TH |
739 | |
740 | throtl = blk_throtl_bio(q, blkg, bio); | |
741 | ||
77ea7338 | 742 | if (!throtl) { |
c454edc2 JB |
743 | /* |
744 | * If the bio is flagged with BIO_QUEUE_ENTERED it means this | |
745 | * is a split bio and we would have already accounted for the | |
746 | * size of the bio. | |
747 | */ | |
748 | if (!bio_flagged(bio, BIO_QUEUE_ENTERED)) | |
749 | blkg_rwstat_add(&blkg->stat_bytes, bio->bi_opf, | |
750 | bio->bi_iter.bi_size); | |
ef295ecf | 751 | blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); |
77ea7338 TH |
752 | } |
753 | ||
e439bedf DZ |
754 | blkcg_bio_issue_init(bio); |
755 | ||
4705de73 | 756 | rcu_read_unlock(); |
ae118896 TH |
757 | return !throtl; |
758 | } | |
759 | ||
d09d8df3 JB |
760 | static inline void blkcg_use_delay(struct blkcg_gq *blkg) |
761 | { | |
762 | if (atomic_add_return(1, &blkg->use_delay) == 1) | |
763 | atomic_inc(&blkg->blkcg->css.cgroup->congestion_count); | |
764 | } | |
765 | ||
766 | static inline int blkcg_unuse_delay(struct blkcg_gq *blkg) | |
767 | { | |
768 | int old = atomic_read(&blkg->use_delay); | |
769 | ||
770 | if (old == 0) | |
771 | return 0; | |
772 | ||
773 | /* | |
774 | * We do this song and dance because we can race with somebody else | |
775 | * adding or removing delay. If we just did an atomic_dec we'd end up | |
776 | * negative and we'd already be in trouble. We need to subtract 1 and | |
777 | * then check to see if we were the last delay so we can drop the | |
778 | * congestion count on the cgroup. | |
779 | */ | |
780 | while (old) { | |
781 | int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1); | |
782 | if (cur == old) | |
783 | break; | |
784 | old = cur; | |
785 | } | |
786 | ||
787 | if (old == 0) | |
788 | return 0; | |
789 | if (old == 1) | |
790 | atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); | |
791 | return 1; | |
792 | } | |
793 | ||
794 | static inline void blkcg_clear_delay(struct blkcg_gq *blkg) | |
795 | { | |
796 | int old = atomic_read(&blkg->use_delay); | |
797 | if (!old) | |
798 | return; | |
799 | /* We only want 1 person clearing the congestion count for this blkg. */ | |
800 | while (old) { | |
801 | int cur = atomic_cmpxchg(&blkg->use_delay, old, 0); | |
802 | if (cur == old) { | |
803 | atomic_dec(&blkg->blkcg->css.cgroup->congestion_count); | |
804 | break; | |
805 | } | |
806 | old = cur; | |
807 | } | |
808 | } | |
809 | ||
810 | void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta); | |
811 | void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay); | |
812 | void blkcg_maybe_throttle_current(void); | |
36558c8a TH |
813 | #else /* CONFIG_BLK_CGROUP */ |
814 | ||
efa7d1c7 TH |
815 | struct blkcg { |
816 | }; | |
2f5ea477 | 817 | |
f95a04af TH |
818 | struct blkg_policy_data { |
819 | }; | |
820 | ||
e48453c3 AA |
821 | struct blkcg_policy_data { |
822 | }; | |
823 | ||
3c798398 | 824 | struct blkcg_gq { |
2f5ea477 JA |
825 | }; |
826 | ||
3c798398 | 827 | struct blkcg_policy { |
3e252066 VG |
828 | }; |
829 | ||
496d5e75 TH |
830 | #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) |
831 | ||
d09d8df3 JB |
832 | static inline void blkcg_maybe_throttle_current(void) { } |
833 | static inline bool blk_cgroup_congested(void) { return false; } | |
834 | ||
efa7d1c7 TH |
835 | #ifdef CONFIG_BLOCK |
836 | ||
d09d8df3 JB |
837 | static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { } |
838 | ||
3c798398 | 839 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } |
b86d865c BVA |
840 | static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q) |
841 | { return NULL; } | |
5efd6113 TH |
842 | static inline int blkcg_init_queue(struct request_queue *q) { return 0; } |
843 | static inline void blkcg_drain_queue(struct request_queue *q) { } | |
844 | static inline void blkcg_exit_queue(struct request_queue *q) { } | |
d5bf0291 | 845 | static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } |
3c798398 | 846 | static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } |
a2b1693b | 847 | static inline int blkcg_activate_policy(struct request_queue *q, |
3c798398 | 848 | const struct blkcg_policy *pol) { return 0; } |
a2b1693b | 849 | static inline void blkcg_deactivate_policy(struct request_queue *q, |
3c798398 TH |
850 | const struct blkcg_policy *pol) { } |
851 | ||
0fe061b9 | 852 | static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } |
b1208b56 | 853 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } |
a051661c | 854 | |
f95a04af TH |
855 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
856 | struct blkcg_policy *pol) { return NULL; } | |
857 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } | |
3c798398 TH |
858 | static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } |
859 | static inline void blkg_get(struct blkcg_gq *blkg) { } | |
860 | static inline void blkg_put(struct blkcg_gq *blkg) { } | |
afc24d49 | 861 | |
d3f77dfd | 862 | static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } |
e439bedf | 863 | static inline void blkcg_bio_issue_init(struct bio *bio) { } |
ae118896 TH |
864 | static inline bool blkcg_bio_issue_check(struct request_queue *q, |
865 | struct bio *bio) { return true; } | |
866 | ||
a051661c TH |
867 | #define blk_queue_for_each_rl(rl, q) \ |
868 | for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) | |
869 | ||
efa7d1c7 | 870 | #endif /* CONFIG_BLOCK */ |
36558c8a TH |
871 | #endif /* CONFIG_BLK_CGROUP */ |
872 | #endif /* _BLK_CGROUP_H */ |