]>
Commit | Line | Data |
---|---|---|
2e76c24d LZ |
1 | #include <linux/cgroup.h> |
2 | #include <linux/slab.h> | |
3 | #include <linux/percpu.h> | |
4 | #include <linux/spinlock.h> | |
5 | #include <linux/cpumask.h> | |
6 | #include <linux/seq_file.h> | |
7 | #include <linux/rcupdate.h> | |
8 | #include <linux/kernel_stat.h> | |
b329fd5b | 9 | #include <linux/err.h> |
2e76c24d LZ |
10 | |
11 | #include "sched.h" | |
12 | ||
13 | /* | |
14 | * CPU accounting code for task groups. | |
15 | * | |
16 | * Based on the work by Paul Menage (menage@google.com) and Balbir Singh | |
17 | * (balbir@in.ibm.com). | |
18 | */ | |
19 | ||
d1712796 LZ |
20 | /* Time spent by the tasks of the cpu accounting group executing in ... */ |
21 | enum cpuacct_stat_index { | |
22 | CPUACCT_STAT_USER, /* ... user mode */ | |
23 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | |
24 | ||
25 | CPUACCT_STAT_NSTATS, | |
26 | }; | |
27 | ||
9acacc2a ZL |
28 | static const char * const cpuacct_stat_desc[] = { |
29 | [CPUACCT_STAT_USER] = "user", | |
30 | [CPUACCT_STAT_SYSTEM] = "system", | |
d740037f DY |
31 | }; |
32 | ||
33 | struct cpuacct_usage { | |
9acacc2a | 34 | u64 usages[CPUACCT_STAT_NSTATS]; |
d740037f DY |
35 | }; |
36 | ||
d1712796 LZ |
37 | /* track cpu usage of a group of tasks and its child groups */ |
38 | struct cpuacct { | |
39 | struct cgroup_subsys_state css; | |
40 | /* cpuusage holds pointer to a u64-type object on every cpu */ | |
d740037f | 41 | struct cpuacct_usage __percpu *cpuusage; |
d1712796 LZ |
42 | struct kernel_cpustat __percpu *cpustat; |
43 | }; | |
44 | ||
a7c6d554 TH |
45 | static inline struct cpuacct *css_ca(struct cgroup_subsys_state *css) |
46 | { | |
47 | return css ? container_of(css, struct cpuacct, css) : NULL; | |
48 | } | |
49 | ||
d1712796 LZ |
50 | /* return cpu accounting group to which this task belongs */ |
51 | static inline struct cpuacct *task_ca(struct task_struct *tsk) | |
52 | { | |
073219e9 | 53 | return css_ca(task_css(tsk, cpuacct_cgrp_id)); |
d1712796 LZ |
54 | } |
55 | ||
d1712796 LZ |
56 | static inline struct cpuacct *parent_ca(struct cpuacct *ca) |
57 | { | |
5c9d535b | 58 | return css_ca(ca->css.parent); |
d1712796 LZ |
59 | } |
60 | ||
d740037f | 61 | static DEFINE_PER_CPU(struct cpuacct_usage, root_cpuacct_cpuusage); |
14c6d3c8 LZ |
62 | static struct cpuacct root_cpuacct = { |
63 | .cpustat = &kernel_cpustat, | |
64 | .cpuusage = &root_cpuacct_cpuusage, | |
65 | }; | |
2e76c24d LZ |
66 | |
67 | /* create a new cpu accounting group */ | |
eb95419b TH |
68 | static struct cgroup_subsys_state * |
69 | cpuacct_css_alloc(struct cgroup_subsys_state *parent_css) | |
2e76c24d LZ |
70 | { |
71 | struct cpuacct *ca; | |
72 | ||
eb95419b | 73 | if (!parent_css) |
2e76c24d LZ |
74 | return &root_cpuacct.css; |
75 | ||
76 | ca = kzalloc(sizeof(*ca), GFP_KERNEL); | |
77 | if (!ca) | |
78 | goto out; | |
79 | ||
d740037f | 80 | ca->cpuusage = alloc_percpu(struct cpuacct_usage); |
2e76c24d LZ |
81 | if (!ca->cpuusage) |
82 | goto out_free_ca; | |
83 | ||
84 | ca->cpustat = alloc_percpu(struct kernel_cpustat); | |
85 | if (!ca->cpustat) | |
86 | goto out_free_cpuusage; | |
87 | ||
88 | return &ca->css; | |
89 | ||
90 | out_free_cpuusage: | |
91 | free_percpu(ca->cpuusage); | |
92 | out_free_ca: | |
93 | kfree(ca); | |
94 | out: | |
95 | return ERR_PTR(-ENOMEM); | |
96 | } | |
97 | ||
98 | /* destroy an existing cpu accounting group */ | |
eb95419b | 99 | static void cpuacct_css_free(struct cgroup_subsys_state *css) |
2e76c24d | 100 | { |
eb95419b | 101 | struct cpuacct *ca = css_ca(css); |
2e76c24d LZ |
102 | |
103 | free_percpu(ca->cpustat); | |
104 | free_percpu(ca->cpuusage); | |
105 | kfree(ca); | |
106 | } | |
107 | ||
d740037f | 108 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu, |
9acacc2a | 109 | enum cpuacct_stat_index index) |
2e76c24d | 110 | { |
d740037f | 111 | struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
2e76c24d LZ |
112 | u64 data; |
113 | ||
d740037f | 114 | /* |
9acacc2a | 115 | * We allow index == CPUACCT_STAT_NSTATS here to read |
d740037f DY |
116 | * the sum of suages. |
117 | */ | |
9acacc2a | 118 | BUG_ON(index > CPUACCT_STAT_NSTATS); |
d740037f | 119 | |
2e76c24d LZ |
120 | #ifndef CONFIG_64BIT |
121 | /* | |
122 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | |
123 | */ | |
124 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | |
d740037f DY |
125 | #endif |
126 | ||
9acacc2a | 127 | if (index == CPUACCT_STAT_NSTATS) { |
d740037f DY |
128 | int i = 0; |
129 | ||
130 | data = 0; | |
9acacc2a | 131 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) |
d740037f DY |
132 | data += cpuusage->usages[i]; |
133 | } else { | |
134 | data = cpuusage->usages[index]; | |
135 | } | |
136 | ||
137 | #ifndef CONFIG_64BIT | |
2e76c24d | 138 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
2e76c24d LZ |
139 | #endif |
140 | ||
141 | return data; | |
142 | } | |
143 | ||
144 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) | |
145 | { | |
d740037f DY |
146 | struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
147 | int i; | |
2e76c24d LZ |
148 | |
149 | #ifndef CONFIG_64BIT | |
150 | /* | |
151 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | |
152 | */ | |
153 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | |
d740037f DY |
154 | #endif |
155 | ||
9acacc2a | 156 | for (i = 0; i < CPUACCT_STAT_NSTATS; i++) |
d740037f DY |
157 | cpuusage->usages[i] = val; |
158 | ||
159 | #ifndef CONFIG_64BIT | |
2e76c24d | 160 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
2e76c24d LZ |
161 | #endif |
162 | } | |
163 | ||
164 | /* return total cpu usage (in nanoseconds) of a group */ | |
d740037f | 165 | static u64 __cpuusage_read(struct cgroup_subsys_state *css, |
9acacc2a | 166 | enum cpuacct_stat_index index) |
2e76c24d | 167 | { |
182446d0 | 168 | struct cpuacct *ca = css_ca(css); |
2e76c24d LZ |
169 | u64 totalcpuusage = 0; |
170 | int i; | |
171 | ||
5ca3726a | 172 | for_each_possible_cpu(i) |
d740037f | 173 | totalcpuusage += cpuacct_cpuusage_read(ca, i, index); |
2e76c24d LZ |
174 | |
175 | return totalcpuusage; | |
176 | } | |
177 | ||
d740037f DY |
178 | static u64 cpuusage_user_read(struct cgroup_subsys_state *css, |
179 | struct cftype *cft) | |
180 | { | |
9acacc2a | 181 | return __cpuusage_read(css, CPUACCT_STAT_USER); |
d740037f DY |
182 | } |
183 | ||
184 | static u64 cpuusage_sys_read(struct cgroup_subsys_state *css, | |
185 | struct cftype *cft) | |
186 | { | |
9acacc2a | 187 | return __cpuusage_read(css, CPUACCT_STAT_SYSTEM); |
d740037f DY |
188 | } |
189 | ||
190 | static u64 cpuusage_read(struct cgroup_subsys_state *css, struct cftype *cft) | |
191 | { | |
9acacc2a | 192 | return __cpuusage_read(css, CPUACCT_STAT_NSTATS); |
d740037f DY |
193 | } |
194 | ||
182446d0 | 195 | static int cpuusage_write(struct cgroup_subsys_state *css, struct cftype *cft, |
1a736b77 | 196 | u64 val) |
2e76c24d | 197 | { |
182446d0 | 198 | struct cpuacct *ca = css_ca(css); |
d740037f | 199 | int cpu; |
2e76c24d | 200 | |
1a736b77 DY |
201 | /* |
202 | * Only allow '0' here to do a reset. | |
203 | */ | |
d740037f DY |
204 | if (val) |
205 | return -EINVAL; | |
2e76c24d | 206 | |
d740037f DY |
207 | for_each_possible_cpu(cpu) |
208 | cpuacct_cpuusage_write(ca, cpu, 0); | |
2e76c24d | 209 | |
d740037f | 210 | return 0; |
2e76c24d LZ |
211 | } |
212 | ||
d740037f | 213 | static int __cpuacct_percpu_seq_show(struct seq_file *m, |
9acacc2a | 214 | enum cpuacct_stat_index index) |
2e76c24d | 215 | { |
2da8ca82 | 216 | struct cpuacct *ca = css_ca(seq_css(m)); |
2e76c24d LZ |
217 | u64 percpu; |
218 | int i; | |
219 | ||
5ca3726a | 220 | for_each_possible_cpu(i) { |
d740037f | 221 | percpu = cpuacct_cpuusage_read(ca, i, index); |
2e76c24d LZ |
222 | seq_printf(m, "%llu ", (unsigned long long) percpu); |
223 | } | |
224 | seq_printf(m, "\n"); | |
225 | return 0; | |
226 | } | |
227 | ||
d740037f DY |
228 | static int cpuacct_percpu_user_seq_show(struct seq_file *m, void *V) |
229 | { | |
9acacc2a | 230 | return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_USER); |
d740037f DY |
231 | } |
232 | ||
233 | static int cpuacct_percpu_sys_seq_show(struct seq_file *m, void *V) | |
234 | { | |
9acacc2a | 235 | return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_SYSTEM); |
d740037f DY |
236 | } |
237 | ||
238 | static int cpuacct_percpu_seq_show(struct seq_file *m, void *V) | |
239 | { | |
9acacc2a | 240 | return __cpuacct_percpu_seq_show(m, CPUACCT_STAT_NSTATS); |
d740037f DY |
241 | } |
242 | ||
277a13e4 ZL |
243 | static int cpuacct_all_seq_show(struct seq_file *m, void *V) |
244 | { | |
245 | struct cpuacct *ca = css_ca(seq_css(m)); | |
246 | int index; | |
247 | int cpu; | |
248 | ||
249 | seq_puts(m, "cpu"); | |
250 | for (index = 0; index < CPUACCT_STAT_NSTATS; index++) | |
251 | seq_printf(m, " %s", cpuacct_stat_desc[index]); | |
252 | seq_puts(m, "\n"); | |
253 | ||
254 | for_each_possible_cpu(cpu) { | |
255 | struct cpuacct_usage *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | |
256 | ||
257 | seq_printf(m, "%d", cpu); | |
258 | ||
259 | for (index = 0; index < CPUACCT_STAT_NSTATS; index++) { | |
260 | #ifndef CONFIG_64BIT | |
261 | /* | |
262 | * Take rq->lock to make 64-bit read safe on 32-bit | |
263 | * platforms. | |
264 | */ | |
265 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | |
266 | #endif | |
267 | ||
268 | seq_printf(m, " %llu", cpuusage->usages[index]); | |
269 | ||
270 | #ifndef CONFIG_64BIT | |
271 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); | |
272 | #endif | |
273 | } | |
274 | seq_puts(m, "\n"); | |
275 | } | |
276 | return 0; | |
277 | } | |
278 | ||
2da8ca82 | 279 | static int cpuacct_stats_show(struct seq_file *sf, void *v) |
2e76c24d | 280 | { |
2da8ca82 | 281 | struct cpuacct *ca = css_ca(seq_css(sf)); |
8e546bfa | 282 | s64 val[CPUACCT_STAT_NSTATS]; |
2e76c24d | 283 | int cpu; |
8e546bfa | 284 | int stat; |
2e76c24d | 285 | |
8e546bfa | 286 | memset(val, 0, sizeof(val)); |
5ca3726a | 287 | for_each_possible_cpu(cpu) { |
8e546bfa | 288 | u64 *cpustat = per_cpu_ptr(ca->cpustat, cpu)->cpustat; |
2e76c24d | 289 | |
8e546bfa ZL |
290 | val[CPUACCT_STAT_USER] += cpustat[CPUTIME_USER]; |
291 | val[CPUACCT_STAT_USER] += cpustat[CPUTIME_NICE]; | |
292 | val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SYSTEM]; | |
293 | val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_IRQ]; | |
294 | val[CPUACCT_STAT_SYSTEM] += cpustat[CPUTIME_SOFTIRQ]; | |
2e76c24d LZ |
295 | } |
296 | ||
8e546bfa ZL |
297 | for (stat = 0; stat < CPUACCT_STAT_NSTATS; stat++) { |
298 | seq_printf(sf, "%s %lld\n", | |
299 | cpuacct_stat_desc[stat], | |
7fb1327e | 300 | (long long)nsec_to_clock_t(val[stat])); |
8e546bfa | 301 | } |
2e76c24d LZ |
302 | |
303 | return 0; | |
304 | } | |
305 | ||
306 | static struct cftype files[] = { | |
307 | { | |
308 | .name = "usage", | |
309 | .read_u64 = cpuusage_read, | |
310 | .write_u64 = cpuusage_write, | |
311 | }, | |
d740037f DY |
312 | { |
313 | .name = "usage_user", | |
314 | .read_u64 = cpuusage_user_read, | |
315 | }, | |
316 | { | |
317 | .name = "usage_sys", | |
318 | .read_u64 = cpuusage_sys_read, | |
319 | }, | |
2e76c24d LZ |
320 | { |
321 | .name = "usage_percpu", | |
2da8ca82 | 322 | .seq_show = cpuacct_percpu_seq_show, |
2e76c24d | 323 | }, |
d740037f DY |
324 | { |
325 | .name = "usage_percpu_user", | |
326 | .seq_show = cpuacct_percpu_user_seq_show, | |
327 | }, | |
328 | { | |
329 | .name = "usage_percpu_sys", | |
330 | .seq_show = cpuacct_percpu_sys_seq_show, | |
331 | }, | |
277a13e4 ZL |
332 | { |
333 | .name = "usage_all", | |
334 | .seq_show = cpuacct_all_seq_show, | |
335 | }, | |
2e76c24d LZ |
336 | { |
337 | .name = "stat", | |
2da8ca82 | 338 | .seq_show = cpuacct_stats_show, |
2e76c24d LZ |
339 | }, |
340 | { } /* terminate */ | |
341 | }; | |
342 | ||
343 | /* | |
344 | * charge this task's execution time to its accounting group. | |
345 | * | |
346 | * called with rq->lock held. | |
347 | */ | |
348 | void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |
349 | { | |
350 | struct cpuacct *ca; | |
9acacc2a | 351 | int index = CPUACCT_STAT_SYSTEM; |
bd928830 | 352 | struct pt_regs *regs = task_pt_regs(tsk); |
d740037f | 353 | |
bd928830 | 354 | if (regs && user_mode(regs)) |
9acacc2a | 355 | index = CPUACCT_STAT_USER; |
2e76c24d LZ |
356 | |
357 | rcu_read_lock(); | |
d740037f | 358 | |
73e6aafd | 359 | for (ca = task_ca(tsk); ca; ca = parent_ca(ca)) |
d740037f DY |
360 | this_cpu_ptr(ca->cpuusage)->usages[index] += cputime; |
361 | ||
2e76c24d LZ |
362 | rcu_read_unlock(); |
363 | } | |
364 | ||
1966aaf7 LZ |
365 | /* |
366 | * Add user/system time to cpuacct. | |
367 | * | |
368 | * Note: it's the caller that updates the account of the root cgroup. | |
369 | */ | |
73e6aafd | 370 | void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) |
1966aaf7 | 371 | { |
1966aaf7 LZ |
372 | struct cpuacct *ca; |
373 | ||
1966aaf7 | 374 | rcu_read_lock(); |
73e6aafd ZL |
375 | for (ca = task_ca(tsk); ca != &root_cpuacct; ca = parent_ca(ca)) |
376 | this_cpu_ptr(ca->cpustat)->cpustat[index] += val; | |
1966aaf7 LZ |
377 | rcu_read_unlock(); |
378 | } | |
379 | ||
073219e9 | 380 | struct cgroup_subsys cpuacct_cgrp_subsys = { |
621e2de0 LZ |
381 | .css_alloc = cpuacct_css_alloc, |
382 | .css_free = cpuacct_css_free, | |
5577964e | 383 | .legacy_cftypes = files, |
b38e42e9 | 384 | .early_init = true, |
2e76c24d | 385 | }; |