]>
Commit | Line | Data |
---|---|---|
2e76c24d LZ |
1 | #include <linux/cgroup.h> |
2 | #include <linux/slab.h> | |
3 | #include <linux/percpu.h> | |
4 | #include <linux/spinlock.h> | |
5 | #include <linux/cpumask.h> | |
6 | #include <linux/seq_file.h> | |
7 | #include <linux/rcupdate.h> | |
8 | #include <linux/kernel_stat.h> | |
b329fd5b | 9 | #include <linux/err.h> |
2e76c24d LZ |
10 | |
11 | #include "sched.h" | |
12 | ||
13 | /* | |
14 | * CPU accounting code for task groups. | |
15 | * | |
16 | * Based on the work by Paul Menage (menage@google.com) and Balbir Singh | |
17 | * (balbir@in.ibm.com). | |
18 | */ | |
19 | ||
d1712796 LZ |
20 | /* Time spent by the tasks of the cpu accounting group executing in ... */ |
21 | enum cpuacct_stat_index { | |
22 | CPUACCT_STAT_USER, /* ... user mode */ | |
23 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | |
24 | ||
25 | CPUACCT_STAT_NSTATS, | |
26 | }; | |
27 | ||
28 | /* track cpu usage of a group of tasks and its child groups */ | |
29 | struct cpuacct { | |
30 | struct cgroup_subsys_state css; | |
31 | /* cpuusage holds pointer to a u64-type object on every cpu */ | |
32 | u64 __percpu *cpuusage; | |
33 | struct kernel_cpustat __percpu *cpustat; | |
34 | }; | |
35 | ||
36 | /* return cpu accounting group corresponding to this container */ | |
37 | static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp) | |
38 | { | |
39 | return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id), | |
40 | struct cpuacct, css); | |
41 | } | |
42 | ||
43 | /* return cpu accounting group to which this task belongs */ | |
44 | static inline struct cpuacct *task_ca(struct task_struct *tsk) | |
45 | { | |
46 | return container_of(task_subsys_state(tsk, cpuacct_subsys_id), | |
47 | struct cpuacct, css); | |
48 | } | |
49 | ||
50 | static inline struct cpuacct *__parent_ca(struct cpuacct *ca) | |
51 | { | |
52 | return cgroup_ca(ca->css.cgroup->parent); | |
53 | } | |
54 | ||
55 | static inline struct cpuacct *parent_ca(struct cpuacct *ca) | |
56 | { | |
57 | if (!ca->css.cgroup->parent) | |
58 | return NULL; | |
59 | return cgroup_ca(ca->css.cgroup->parent); | |
60 | } | |
61 | ||
7943e15a | 62 | static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); |
14c6d3c8 LZ |
63 | static struct cpuacct root_cpuacct = { |
64 | .cpustat = &kernel_cpustat, | |
65 | .cpuusage = &root_cpuacct_cpuusage, | |
66 | }; | |
2e76c24d LZ |
67 | |
68 | /* create a new cpu accounting group */ | |
69 | static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp) | |
70 | { | |
71 | struct cpuacct *ca; | |
72 | ||
73 | if (!cgrp->parent) | |
74 | return &root_cpuacct.css; | |
75 | ||
76 | ca = kzalloc(sizeof(*ca), GFP_KERNEL); | |
77 | if (!ca) | |
78 | goto out; | |
79 | ||
80 | ca->cpuusage = alloc_percpu(u64); | |
81 | if (!ca->cpuusage) | |
82 | goto out_free_ca; | |
83 | ||
84 | ca->cpustat = alloc_percpu(struct kernel_cpustat); | |
85 | if (!ca->cpustat) | |
86 | goto out_free_cpuusage; | |
87 | ||
88 | return &ca->css; | |
89 | ||
90 | out_free_cpuusage: | |
91 | free_percpu(ca->cpuusage); | |
92 | out_free_ca: | |
93 | kfree(ca); | |
94 | out: | |
95 | return ERR_PTR(-ENOMEM); | |
96 | } | |
97 | ||
98 | /* destroy an existing cpu accounting group */ | |
99 | static void cpuacct_css_free(struct cgroup *cgrp) | |
100 | { | |
101 | struct cpuacct *ca = cgroup_ca(cgrp); | |
102 | ||
103 | free_percpu(ca->cpustat); | |
104 | free_percpu(ca->cpuusage); | |
105 | kfree(ca); | |
106 | } | |
107 | ||
108 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) | |
109 | { | |
110 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | |
111 | u64 data; | |
112 | ||
113 | #ifndef CONFIG_64BIT | |
114 | /* | |
115 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | |
116 | */ | |
117 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | |
118 | data = *cpuusage; | |
119 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); | |
120 | #else | |
121 | data = *cpuusage; | |
122 | #endif | |
123 | ||
124 | return data; | |
125 | } | |
126 | ||
127 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) | |
128 | { | |
129 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | |
130 | ||
131 | #ifndef CONFIG_64BIT | |
132 | /* | |
133 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | |
134 | */ | |
135 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | |
136 | *cpuusage = val; | |
137 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); | |
138 | #else | |
139 | *cpuusage = val; | |
140 | #endif | |
141 | } | |
142 | ||
143 | /* return total cpu usage (in nanoseconds) of a group */ | |
144 | static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) | |
145 | { | |
146 | struct cpuacct *ca = cgroup_ca(cgrp); | |
147 | u64 totalcpuusage = 0; | |
148 | int i; | |
149 | ||
150 | for_each_present_cpu(i) | |
151 | totalcpuusage += cpuacct_cpuusage_read(ca, i); | |
152 | ||
153 | return totalcpuusage; | |
154 | } | |
155 | ||
156 | static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype, | |
157 | u64 reset) | |
158 | { | |
159 | struct cpuacct *ca = cgroup_ca(cgrp); | |
160 | int err = 0; | |
161 | int i; | |
162 | ||
163 | if (reset) { | |
164 | err = -EINVAL; | |
165 | goto out; | |
166 | } | |
167 | ||
168 | for_each_present_cpu(i) | |
169 | cpuacct_cpuusage_write(ca, i, 0); | |
170 | ||
171 | out: | |
172 | return err; | |
173 | } | |
174 | ||
175 | static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | |
176 | struct seq_file *m) | |
177 | { | |
178 | struct cpuacct *ca = cgroup_ca(cgroup); | |
179 | u64 percpu; | |
180 | int i; | |
181 | ||
182 | for_each_present_cpu(i) { | |
183 | percpu = cpuacct_cpuusage_read(ca, i); | |
184 | seq_printf(m, "%llu ", (unsigned long long) percpu); | |
185 | } | |
186 | seq_printf(m, "\n"); | |
187 | return 0; | |
188 | } | |
189 | ||
190 | static const char * const cpuacct_stat_desc[] = { | |
191 | [CPUACCT_STAT_USER] = "user", | |
192 | [CPUACCT_STAT_SYSTEM] = "system", | |
193 | }; | |
194 | ||
195 | static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, | |
196 | struct cgroup_map_cb *cb) | |
197 | { | |
198 | struct cpuacct *ca = cgroup_ca(cgrp); | |
199 | int cpu; | |
200 | s64 val = 0; | |
201 | ||
202 | for_each_online_cpu(cpu) { | |
203 | struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); | |
204 | val += kcpustat->cpustat[CPUTIME_USER]; | |
205 | val += kcpustat->cpustat[CPUTIME_NICE]; | |
206 | } | |
207 | val = cputime64_to_clock_t(val); | |
208 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val); | |
209 | ||
210 | val = 0; | |
211 | for_each_online_cpu(cpu) { | |
212 | struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); | |
213 | val += kcpustat->cpustat[CPUTIME_SYSTEM]; | |
214 | val += kcpustat->cpustat[CPUTIME_IRQ]; | |
215 | val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; | |
216 | } | |
217 | ||
218 | val = cputime64_to_clock_t(val); | |
219 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); | |
220 | ||
221 | return 0; | |
222 | } | |
223 | ||
224 | static struct cftype files[] = { | |
225 | { | |
226 | .name = "usage", | |
227 | .read_u64 = cpuusage_read, | |
228 | .write_u64 = cpuusage_write, | |
229 | }, | |
230 | { | |
231 | .name = "usage_percpu", | |
232 | .read_seq_string = cpuacct_percpu_seq_read, | |
233 | }, | |
234 | { | |
235 | .name = "stat", | |
236 | .read_map = cpuacct_stats_show, | |
237 | }, | |
238 | { } /* terminate */ | |
239 | }; | |
240 | ||
241 | /* | |
242 | * charge this task's execution time to its accounting group. | |
243 | * | |
244 | * called with rq->lock held. | |
245 | */ | |
246 | void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |
247 | { | |
248 | struct cpuacct *ca; | |
249 | int cpu; | |
250 | ||
2e76c24d LZ |
251 | cpu = task_cpu(tsk); |
252 | ||
253 | rcu_read_lock(); | |
254 | ||
255 | ca = task_ca(tsk); | |
256 | ||
543bc0e7 | 257 | while (true) { |
2e76c24d LZ |
258 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
259 | *cpuusage += cputime; | |
543bc0e7 LZ |
260 | |
261 | ca = parent_ca(ca); | |
262 | if (!ca) | |
263 | break; | |
2e76c24d LZ |
264 | } |
265 | ||
266 | rcu_read_unlock(); | |
267 | } | |
268 | ||
1966aaf7 LZ |
269 | /* |
270 | * Add user/system time to cpuacct. | |
271 | * | |
272 | * Note: it's the caller that updates the account of the root cgroup. | |
273 | */ | |
274 | void cpuacct_account_field(struct task_struct *p, int index, u64 val) | |
275 | { | |
276 | struct kernel_cpustat *kcpustat; | |
277 | struct cpuacct *ca; | |
278 | ||
1966aaf7 LZ |
279 | rcu_read_lock(); |
280 | ca = task_ca(p); | |
5f40d804 | 281 | while (ca != &root_cpuacct) { |
1966aaf7 LZ |
282 | kcpustat = this_cpu_ptr(ca->cpustat); |
283 | kcpustat->cpustat[index] += val; | |
5f40d804 | 284 | ca = __parent_ca(ca); |
1966aaf7 LZ |
285 | } |
286 | rcu_read_unlock(); | |
287 | } | |
288 | ||
2e76c24d | 289 | struct cgroup_subsys cpuacct_subsys = { |
621e2de0 LZ |
290 | .name = "cpuacct", |
291 | .css_alloc = cpuacct_css_alloc, | |
292 | .css_free = cpuacct_css_free, | |
293 | .subsys_id = cpuacct_subsys_id, | |
294 | .base_cftypes = files, | |
295 | .early_init = 1, | |
2e76c24d | 296 | }; |