]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - include/linux/cgroup.h
cgroup: reorganize css_task_iter functions
[mirror_ubuntu-bionic-kernel.git] / include / linux / cgroup.h
CommitLineData
ddbcc7e8
PM
1#ifndef _LINUX_CGROUP_H
2#define _LINUX_CGROUP_H
3/*
4 * cgroup interface
5 *
6 * Copyright (C) 2003 BULL SA
7 * Copyright (C) 2004-2006 Silicon Graphics, Inc.
8 *
9 */
10
11#include <linux/sched.h>
ddbcc7e8
PM
12#include <linux/cpumask.h>
13#include <linux/nodemask.h>
eb6fd504 14#include <linux/rculist.h>
846c7bb0 15#include <linux/cgroupstats.h>
cc31edce 16#include <linux/rwsem.h>
25a7e684 17#include <linux/fs.h>
7da11279 18#include <linux/seq_file.h>
2bd59d48 19#include <linux/kernfs.h>
49d1dc4b 20#include <linux/jump_label.h>
ddbcc7e8 21
b4a04ab7 22#include <linux/cgroup-defs.h>
ddbcc7e8
PM
23
24#ifdef CONFIG_CGROUPS
25
6abc8ca1
TH
26/*
27 * All weight knobs on the default hierarhcy should use the following min,
28 * default and max values. The default value is the logarithmic center of
29 * MIN and MAX and allows 100x to be expressed in both directions.
30 */
31#define CGROUP_WEIGHT_MIN 1
32#define CGROUP_WEIGHT_DFL 100
33#define CGROUP_WEIGHT_MAX 10000
34
c326aa2b
TH
35/* a css_task_iter should be treated as an opaque object */
36struct css_task_iter {
37 struct cgroup_subsys *ss;
38
39 struct list_head *cset_pos;
40 struct list_head *cset_head;
ddbcc7e8 41
c326aa2b
TH
42 struct list_head *task_pos;
43 struct list_head *tasks_head;
44 struct list_head *mg_tasks_head;
45};
ddbcc7e8 46
c326aa2b
TH
47extern struct cgroup_root cgrp_dfl_root;
48extern struct css_set init_css_set;
a424316c 49
c326aa2b 50#define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys;
817929ec 51#include <linux/cgroup_subsys.h>
817929ec
PM
52#undef SUBSYS
53
49d1dc4b
TH
54#define SUBSYS(_x) \
55 extern struct static_key_true _x ## _cgrp_subsys_enabled_key; \
56 extern struct static_key_true _x ## _cgrp_subsys_on_dfl_key;
57#include <linux/cgroup_subsys.h>
58#undef SUBSYS
59
60/**
61 * cgroup_subsys_enabled - fast test on whether a subsys is enabled
62 * @ss: subsystem in question
63 */
64#define cgroup_subsys_enabled(ss) \
65 static_branch_likely(&ss ## _enabled_key)
66
67/**
68 * cgroup_subsys_on_dfl - fast test on whether a subsys is on default hierarchy
69 * @ss: subsystem in question
70 */
71#define cgroup_subsys_on_dfl(ss) \
72 static_branch_likely(&ss ## _on_dfl_key)
73
c326aa2b
TH
74bool css_has_online_children(struct cgroup_subsys_state *css);
75struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
76struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
77 struct cgroup_subsys *ss);
78struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
79 struct cgroup_subsys *ss);
80
81bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor);
82int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
83int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
84
85int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
86int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts);
87int cgroup_rm_cftypes(struct cftype *cfts);
88
89char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen);
90int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry);
91int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
92 struct pid *pid, struct task_struct *tsk);
93
94void cgroup_fork(struct task_struct *p);
7e47682e
AS
95extern int cgroup_can_fork(struct task_struct *p,
96 void *ss_priv[CGROUP_CANFORK_COUNT]);
97extern void cgroup_cancel_fork(struct task_struct *p,
98 void *ss_priv[CGROUP_CANFORK_COUNT]);
99extern void cgroup_post_fork(struct task_struct *p,
100 void *old_ss_priv[CGROUP_CANFORK_COUNT]);
c326aa2b
TH
101void cgroup_exit(struct task_struct *p);
102
103int cgroup_init_early(void);
104int cgroup_init(void);
105
5c9d535b 106/*
c326aa2b
TH
107 * Iteration helpers and macros.
108 */
109
110struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
111 struct cgroup_subsys_state *parent);
112struct cgroup_subsys_state *css_next_descendant_pre(struct cgroup_subsys_state *pos,
113 struct cgroup_subsys_state *css);
114struct cgroup_subsys_state *css_rightmost_descendant(struct cgroup_subsys_state *pos);
115struct cgroup_subsys_state *css_next_descendant_post(struct cgroup_subsys_state *pos,
116 struct cgroup_subsys_state *css);
117
118struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset);
119struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset);
120
121void css_task_iter_start(struct cgroup_subsys_state *css,
122 struct css_task_iter *it);
123struct task_struct *css_task_iter_next(struct css_task_iter *it);
124void css_task_iter_end(struct css_task_iter *it);
125
126/**
127 * css_for_each_child - iterate through children of a css
128 * @pos: the css * to use as the loop cursor
129 * @parent: css whose children to walk
130 *
131 * Walk @parent's children. Must be called under rcu_read_lock().
132 *
133 * If a subsystem synchronizes ->css_online() and the start of iteration, a
134 * css which finished ->css_online() is guaranteed to be visible in the
135 * future iterations and will stay visible until the last reference is put.
136 * A css which hasn't finished ->css_online() or already finished
137 * ->css_offline() may show up during traversal. It's each subsystem's
138 * responsibility to synchronize against on/offlining.
5c9d535b 139 *
c326aa2b
TH
140 * It is allowed to temporarily drop RCU read lock during iteration. The
141 * caller is responsible for ensuring that @pos remains accessible until
142 * the start of the next iteration by, for example, bumping the css refcnt.
5c9d535b 143 */
c326aa2b
TH
144#define css_for_each_child(pos, parent) \
145 for ((pos) = css_next_child(NULL, (parent)); (pos); \
146 (pos) = css_next_child((pos), (parent)))
ddbcc7e8 147
c326aa2b
TH
148/**
149 * css_for_each_descendant_pre - pre-order walk of a css's descendants
150 * @pos: the css * to use as the loop cursor
151 * @root: css whose descendants to walk
152 *
153 * Walk @root's descendants. @root is included in the iteration and the
154 * first node to be visited. Must be called under rcu_read_lock().
155 *
156 * If a subsystem synchronizes ->css_online() and the start of iteration, a
157 * css which finished ->css_online() is guaranteed to be visible in the
158 * future iterations and will stay visible until the last reference is put.
159 * A css which hasn't finished ->css_online() or already finished
160 * ->css_offline() may show up during traversal. It's each subsystem's
161 * responsibility to synchronize against on/offlining.
162 *
163 * For example, the following guarantees that a descendant can't escape
164 * state updates of its ancestors.
165 *
166 * my_online(@css)
167 * {
168 * Lock @css's parent and @css;
169 * Inherit state from the parent;
170 * Unlock both.
171 * }
172 *
173 * my_update_state(@css)
174 * {
175 * css_for_each_descendant_pre(@pos, @css) {
176 * Lock @pos;
177 * if (@pos == @css)
178 * Update @css's state;
179 * else
180 * Verify @pos is alive and inherit state from its parent;
181 * Unlock @pos;
182 * }
183 * }
184 *
185 * As long as the inheriting step, including checking the parent state, is
186 * enclosed inside @pos locking, double-locking the parent isn't necessary
187 * while inheriting. The state update to the parent is guaranteed to be
188 * visible by walking order and, as long as inheriting operations to the
189 * same @pos are atomic to each other, multiple updates racing each other
190 * still result in the correct state. It's guaranateed that at least one
191 * inheritance happens for any css after the latest update to its parent.
192 *
193 * If checking parent's state requires locking the parent, each inheriting
194 * iteration should lock and unlock both @pos->parent and @pos.
195 *
196 * Alternatively, a subsystem may choose to use a single global lock to
197 * synchronize ->css_online() and ->css_offline() against tree-walking
198 * operations.
199 *
200 * It is allowed to temporarily drop RCU read lock during iteration. The
201 * caller is responsible for ensuring that @pos remains accessible until
202 * the start of the next iteration by, for example, bumping the css refcnt.
203 */
204#define css_for_each_descendant_pre(pos, css) \
205 for ((pos) = css_next_descendant_pre(NULL, (css)); (pos); \
206 (pos) = css_next_descendant_pre((pos), (css)))
207
208/**
209 * css_for_each_descendant_post - post-order walk of a css's descendants
210 * @pos: the css * to use as the loop cursor
211 * @css: css whose descendants to walk
212 *
213 * Similar to css_for_each_descendant_pre() but performs post-order
214 * traversal instead. @root is included in the iteration and the last
215 * node to be visited.
216 *
217 * If a subsystem synchronizes ->css_online() and the start of iteration, a
218 * css which finished ->css_online() is guaranteed to be visible in the
219 * future iterations and will stay visible until the last reference is put.
220 * A css which hasn't finished ->css_online() or already finished
221 * ->css_offline() may show up during traversal. It's each subsystem's
222 * responsibility to synchronize against on/offlining.
223 *
224 * Note that the walk visibility guarantee example described in pre-order
225 * walk doesn't apply the same to post-order walks.
226 */
227#define css_for_each_descendant_post(pos, css) \
228 for ((pos) = css_next_descendant_post(NULL, (css)); (pos); \
229 (pos) = css_next_descendant_post((pos), (css)))
230
231/**
232 * cgroup_taskset_for_each - iterate cgroup_taskset
233 * @task: the loop cursor
234 * @tset: taskset to iterate
4530eddb
TH
235 *
236 * @tset may contain multiple tasks and they may belong to multiple
237 * processes. When there are multiple tasks in @tset, if a task of a
238 * process is in @tset, all tasks of the process are in @tset. Also, all
239 * are guaranteed to share the same source and destination csses.
240 *
241 * Iteration is not in any specific order.
c326aa2b
TH
242 */
243#define cgroup_taskset_for_each(task, tset) \
244 for ((task) = cgroup_taskset_first((tset)); (task); \
245 (task) = cgroup_taskset_next((tset)))
ddbcc7e8 246
4530eddb
TH
247/**
248 * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset
249 * @leader: the loop cursor
250 * @tset: takset to iterate
251 *
252 * Iterate threadgroup leaders of @tset. For single-task migrations, @tset
253 * may not contain any.
254 */
255#define cgroup_taskset_for_each_leader(leader, tset) \
256 for ((leader) = cgroup_taskset_first((tset)); (leader); \
257 (leader) = cgroup_taskset_next((tset))) \
258 if ((leader) != (leader)->group_leader) \
259 ; \
260 else
261
c326aa2b
TH
262/*
263 * Inline functions.
264 */
ddbcc7e8 265
5de0107e
TH
266/**
267 * css_get - obtain a reference on the specified css
268 * @css: target css
269 *
270 * The caller must already have a reference.
ddbcc7e8 271 */
ddbcc7e8
PM
272static inline void css_get(struct cgroup_subsys_state *css)
273{
3b514d24
TH
274 if (!(css->flags & CSS_NO_REF))
275 percpu_ref_get(&css->refcnt);
ddbcc7e8 276}
e7c5ec91 277
e8ea14cc
JW
278/**
279 * css_get_many - obtain references on the specified css
280 * @css: target css
281 * @n: number of references to get
282 *
283 * The caller must already have a reference.
284 */
285static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
286{
287 if (!(css->flags & CSS_NO_REF))
288 percpu_ref_get_many(&css->refcnt, n);
289}
290
6f4524d3
TH
291/**
292 * css_tryget - try to obtain a reference on the specified css
293 * @css: target css
294 *
295 * Obtain a reference on @css unless it already has reached zero and is
296 * being released. This function doesn't care whether @css is on or
297 * offline. The caller naturally needs to ensure that @css is accessible
298 * but doesn't have to be holding a reference on it - IOW, RCU protected
299 * access is good enough for this function. Returns %true if a reference
300 * count was successfully obtained; %false otherwise.
301 */
302static inline bool css_tryget(struct cgroup_subsys_state *css)
303{
304 if (!(css->flags & CSS_NO_REF))
305 return percpu_ref_tryget(&css->refcnt);
306 return true;
307}
308
5de0107e 309/**
ec903c0c 310 * css_tryget_online - try to obtain a reference on the specified css if online
5de0107e
TH
311 * @css: target css
312 *
ec903c0c
TH
313 * Obtain a reference on @css if it's online. The caller naturally needs
314 * to ensure that @css is accessible but doesn't have to be holding a
5de0107e
TH
315 * reference on it - IOW, RCU protected access is good enough for this
316 * function. Returns %true if a reference count was successfully obtained;
317 * %false otherwise.
318 */
ec903c0c 319static inline bool css_tryget_online(struct cgroup_subsys_state *css)
e7c5ec91 320{
3b514d24
TH
321 if (!(css->flags & CSS_NO_REF))
322 return percpu_ref_tryget_live(&css->refcnt);
323 return true;
e7c5ec91 324}
2f7ee569
TH
325
326/**
5de0107e
TH
327 * css_put - put a css reference
328 * @css: target css
329 *
ec903c0c 330 * Put a reference obtained via css_get() and css_tryget_online().
2f7ee569 331 */
ddbcc7e8
PM
332static inline void css_put(struct cgroup_subsys_state *css)
333{
3b514d24
TH
334 if (!(css->flags & CSS_NO_REF))
335 percpu_ref_put(&css->refcnt);
ddbcc7e8 336}
2f7ee569 337
e8ea14cc
JW
338/**
339 * css_put_many - put css references
340 * @css: target css
341 * @n: number of references to put
342 *
343 * Put references obtained via css_get() and css_tryget_online().
21acb9ca 344 */
e8ea14cc
JW
345static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
346{
347 if (!(css->flags & CSS_NO_REF))
348 percpu_ref_put_many(&css->refcnt, n);
349}
ddbcc7e8 350
14611e51
TH
351/**
352 * task_css_set_check - obtain a task's css_set with extra access conditions
353 * @task: the task to obtain css_set for
354 * @__c: extra condition expression to be passed to rcu_dereference_check()
355 *
356 * A task's css_set is RCU protected, initialized and exited while holding
357 * task_lock(), and can only be modified while holding both cgroup_mutex
358 * and task_lock() while the task is alive. This macro verifies that the
359 * caller is inside proper critical section and returns @task's css_set.
360 *
361 * The caller can also specify additional allowed conditions via @__c, such
362 * as locks used during the cgroup_subsys::attach() methods.
dc61b1d6 363 */
2219449a
TH
364#ifdef CONFIG_PROVE_RCU
365extern struct mutex cgroup_mutex;
0e1d768f 366extern struct rw_semaphore css_set_rwsem;
14611e51
TH
367#define task_css_set_check(task, __c) \
368 rcu_dereference_check((task)->cgroups, \
0e1d768f
TH
369 lockdep_is_held(&cgroup_mutex) || \
370 lockdep_is_held(&css_set_rwsem) || \
371 ((task)->flags & PF_EXITING) || (__c))
2219449a 372#else
14611e51
TH
373#define task_css_set_check(task, __c) \
374 rcu_dereference((task)->cgroups)
2219449a 375#endif
dc61b1d6 376
14611e51 377/**
8af01f56 378 * task_css_check - obtain css for (task, subsys) w/ extra access conds
14611e51
TH
379 * @task: the target task
380 * @subsys_id: the target subsystem ID
381 * @__c: extra condition expression to be passed to rcu_dereference_check()
382 *
383 * Return the cgroup_subsys_state for the (@task, @subsys_id) pair. The
384 * synchronization rules are the same as task_css_set_check().
385 */
8af01f56 386#define task_css_check(task, subsys_id, __c) \
14611e51
TH
387 task_css_set_check((task), (__c))->subsys[(subsys_id)]
388
389/**
390 * task_css_set - obtain a task's css_set
391 * @task: the task to obtain css_set for
392 *
393 * See task_css_set_check().
394 */
395static inline struct css_set *task_css_set(struct task_struct *task)
396{
397 return task_css_set_check(task, false);
398}
399
400/**
8af01f56 401 * task_css - obtain css for (task, subsys)
14611e51
TH
402 * @task: the target task
403 * @subsys_id: the target subsystem ID
404 *
8af01f56 405 * See task_css_check().
14611e51 406 */
8af01f56
TH
407static inline struct cgroup_subsys_state *task_css(struct task_struct *task,
408 int subsys_id)
ddbcc7e8 409{
8af01f56 410 return task_css_check(task, subsys_id, false);
ddbcc7e8
PM
411}
412
ec438699
TH
413/**
414 * task_get_css - find and get the css for (task, subsys)
415 * @task: the target task
416 * @subsys_id: the target subsystem ID
417 *
418 * Find the css for the (@task, @subsys_id) combination, increment a
419 * reference on and return it. This function is guaranteed to return a
420 * valid css.
421 */
422static inline struct cgroup_subsys_state *
423task_get_css(struct task_struct *task, int subsys_id)
424{
425 struct cgroup_subsys_state *css;
426
427 rcu_read_lock();
428 while (true) {
429 css = task_css(task, subsys_id);
430 if (likely(css_tryget_online(css)))
431 break;
432 cpu_relax();
433 }
434 rcu_read_unlock();
435 return css;
436}
437
5024ae29
TH
438/**
439 * task_css_is_root - test whether a task belongs to the root css
440 * @task: the target task
441 * @subsys_id: the target subsystem ID
442 *
443 * Test whether @task belongs to the root css on the specified subsystem.
444 * May be invoked in any context.
445 */
446static inline bool task_css_is_root(struct task_struct *task, int subsys_id)
447{
448 return task_css_check(task, subsys_id, true) ==
449 init_css_set.subsys[subsys_id];
450}
451
8af01f56
TH
452static inline struct cgroup *task_cgroup(struct task_struct *task,
453 int subsys_id)
ddbcc7e8 454{
8af01f56 455 return task_css(task, subsys_id)->cgroup;
ddbcc7e8
PM
456}
457
07bc356e 458/* no synchronization, the result can only be used as a hint */
27bd4dbb 459static inline bool cgroup_is_populated(struct cgroup *cgrp)
07bc356e 460{
27bd4dbb 461 return cgrp->populated_cnt;
07bc356e 462}
f3d46500 463
f29374b1 464/* returns ino associated with a cgroup */
b1664924
TH
465static inline ino_t cgroup_ino(struct cgroup *cgrp)
466{
f29374b1 467 return cgrp->kn->ino;
b1664924 468}
3ebb2b6e 469
b4168640
TH
470/* cft/css accessors for cftype->write() operation */
471static inline struct cftype *of_cft(struct kernfs_open_file *of)
7da11279 472{
2bd59d48 473 return of->kn->priv;
7da11279 474}
0f0a2b4f 475
b4168640 476struct cgroup_subsys_state *of_css(struct kernfs_open_file *of);
817929ec 477
b4168640
TH
478/* cft/css accessors for cftype->seq_*() operations */
479static inline struct cftype *seq_cft(struct seq_file *seq)
480{
481 return of_cft(seq->private);
482}
e535837b 483
b4168640
TH
484static inline struct cgroup_subsys_state *seq_css(struct seq_file *seq)
485{
486 return of_css(seq->private);
487}
31583bb0 488
e61734c5
TH
489/*
490 * Name / path handling functions. All are thin wrappers around the kernfs
491 * counterparts and can be called under any context.
492 */
38460b48 493
e61734c5
TH
494static inline int cgroup_name(struct cgroup *cgrp, char *buf, size_t buflen)
495{
fdce6bf8 496 return kernfs_name(cgrp->kn, buf, buflen);
e61734c5 497}
ddbcc7e8 498
e61734c5
TH
499static inline char * __must_check cgroup_path(struct cgroup *cgrp, char *buf,
500 size_t buflen)
501{
fdce6bf8 502 return kernfs_path(cgrp->kn, buf, buflen);
e61734c5 503}
f3ba5380 504
e61734c5
TH
505static inline void pr_cont_cgroup_name(struct cgroup *cgrp)
506{
fdce6bf8 507 pr_cont_kernfs_name(cgrp->kn);
e61734c5 508}
ddbcc7e8 509
e61734c5 510static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
846c7bb0 511{
fdce6bf8 512 pr_cont_kernfs_path(cgrp->kn);
846c7bb0 513}
ddbcc7e8 514
6f60eade
TH
515/**
516 * cgroup_file_notify - generate a file modified event for a cgroup_file
517 * @cfile: target cgroup_file
518 *
519 * @cfile must have been obtained by setting cftype->file_offset.
520 */
521static inline void cgroup_file_notify(struct cgroup_file *cfile)
522{
523 /* might not have been created due to one of the CFTYPE selector flags */
524 if (cfile->kn)
525 kernfs_notify(cfile->kn);
526}
527
ddbcc7e8
PM
528#else /* !CONFIG_CGROUPS */
529
f3ba5380 530struct cgroup_subsys_state;
f3ba5380 531
c326aa2b 532static inline void css_put(struct cgroup_subsys_state *css) {}
31583bb0 533static inline int cgroup_attach_task_all(struct task_struct *from,
c326aa2b
TH
534 struct task_struct *t) { return 0; }
535static inline int cgroupstats_build(struct cgroupstats *stats,
536 struct dentry *dentry) { return -EINVAL; }
537
b4f48b63 538static inline void cgroup_fork(struct task_struct *p) {}
7e47682e
AS
539static inline int cgroup_can_fork(struct task_struct *p,
540 void *ss_priv[CGROUP_CANFORK_COUNT])
541{ return 0; }
542static inline void cgroup_cancel_fork(struct task_struct *p,
543 void *ss_priv[CGROUP_CANFORK_COUNT]) {}
544static inline void cgroup_post_fork(struct task_struct *p,
545 void *ss_priv[CGROUP_CANFORK_COUNT]) {}
1ec41830 546static inline void cgroup_exit(struct task_struct *p) {}
ddbcc7e8 547
c326aa2b
TH
548static inline int cgroup_init_early(void) { return 0; }
549static inline int cgroup_init(void) { return 0; }
d7926ee3 550
ddbcc7e8
PM
551#endif /* !CONFIG_CGROUPS */
552
553#endif /* _LINUX_CGROUP_H */