]>
Commit | Line | Data |
---|---|---|
2e76c24d LZ |
1 | #include <linux/cgroup.h> |
2 | #include <linux/slab.h> | |
3 | #include <linux/percpu.h> | |
4 | #include <linux/spinlock.h> | |
5 | #include <linux/cpumask.h> | |
6 | #include <linux/seq_file.h> | |
7 | #include <linux/rcupdate.h> | |
8 | #include <linux/kernel_stat.h> | |
9 | ||
10 | #include "sched.h" | |
11 | ||
12 | /* | |
13 | * CPU accounting code for task groups. | |
14 | * | |
15 | * Based on the work by Paul Menage (menage@google.com) and Balbir Singh | |
16 | * (balbir@in.ibm.com). | |
17 | */ | |
18 | ||
d1712796 LZ |
19 | /* Time spent by the tasks of the cpu accounting group executing in ... */ |
20 | enum cpuacct_stat_index { | |
21 | CPUACCT_STAT_USER, /* ... user mode */ | |
22 | CPUACCT_STAT_SYSTEM, /* ... kernel mode */ | |
23 | ||
24 | CPUACCT_STAT_NSTATS, | |
25 | }; | |
26 | ||
27 | /* track cpu usage of a group of tasks and its child groups */ | |
28 | struct cpuacct { | |
29 | struct cgroup_subsys_state css; | |
30 | /* cpuusage holds pointer to a u64-type object on every cpu */ | |
31 | u64 __percpu *cpuusage; | |
32 | struct kernel_cpustat __percpu *cpustat; | |
33 | }; | |
34 | ||
35 | /* return cpu accounting group corresponding to this container */ | |
36 | static inline struct cpuacct *cgroup_ca(struct cgroup *cgrp) | |
37 | { | |
38 | return container_of(cgroup_subsys_state(cgrp, cpuacct_subsys_id), | |
39 | struct cpuacct, css); | |
40 | } | |
41 | ||
42 | /* return cpu accounting group to which this task belongs */ | |
43 | static inline struct cpuacct *task_ca(struct task_struct *tsk) | |
44 | { | |
45 | return container_of(task_subsys_state(tsk, cpuacct_subsys_id), | |
46 | struct cpuacct, css); | |
47 | } | |
48 | ||
49 | static inline struct cpuacct *__parent_ca(struct cpuacct *ca) | |
50 | { | |
51 | return cgroup_ca(ca->css.cgroup->parent); | |
52 | } | |
53 | ||
54 | static inline struct cpuacct *parent_ca(struct cpuacct *ca) | |
55 | { | |
56 | if (!ca->css.cgroup->parent) | |
57 | return NULL; | |
58 | return cgroup_ca(ca->css.cgroup->parent); | |
59 | } | |
60 | ||
7943e15a | 61 | static DEFINE_PER_CPU(u64, root_cpuacct_cpuusage); |
14c6d3c8 LZ |
62 | static struct cpuacct root_cpuacct = { |
63 | .cpustat = &kernel_cpustat, | |
64 | .cpuusage = &root_cpuacct_cpuusage, | |
65 | }; | |
2e76c24d LZ |
66 | |
67 | /* create a new cpu accounting group */ | |
68 | static struct cgroup_subsys_state *cpuacct_css_alloc(struct cgroup *cgrp) | |
69 | { | |
70 | struct cpuacct *ca; | |
71 | ||
72 | if (!cgrp->parent) | |
73 | return &root_cpuacct.css; | |
74 | ||
75 | ca = kzalloc(sizeof(*ca), GFP_KERNEL); | |
76 | if (!ca) | |
77 | goto out; | |
78 | ||
79 | ca->cpuusage = alloc_percpu(u64); | |
80 | if (!ca->cpuusage) | |
81 | goto out_free_ca; | |
82 | ||
83 | ca->cpustat = alloc_percpu(struct kernel_cpustat); | |
84 | if (!ca->cpustat) | |
85 | goto out_free_cpuusage; | |
86 | ||
87 | return &ca->css; | |
88 | ||
89 | out_free_cpuusage: | |
90 | free_percpu(ca->cpuusage); | |
91 | out_free_ca: | |
92 | kfree(ca); | |
93 | out: | |
94 | return ERR_PTR(-ENOMEM); | |
95 | } | |
96 | ||
97 | /* destroy an existing cpu accounting group */ | |
98 | static void cpuacct_css_free(struct cgroup *cgrp) | |
99 | { | |
100 | struct cpuacct *ca = cgroup_ca(cgrp); | |
101 | ||
102 | free_percpu(ca->cpustat); | |
103 | free_percpu(ca->cpuusage); | |
104 | kfree(ca); | |
105 | } | |
106 | ||
107 | static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) | |
108 | { | |
109 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | |
110 | u64 data; | |
111 | ||
112 | #ifndef CONFIG_64BIT | |
113 | /* | |
114 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | |
115 | */ | |
116 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | |
117 | data = *cpuusage; | |
118 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); | |
119 | #else | |
120 | data = *cpuusage; | |
121 | #endif | |
122 | ||
123 | return data; | |
124 | } | |
125 | ||
126 | static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) | |
127 | { | |
128 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); | |
129 | ||
130 | #ifndef CONFIG_64BIT | |
131 | /* | |
132 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | |
133 | */ | |
134 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); | |
135 | *cpuusage = val; | |
136 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); | |
137 | #else | |
138 | *cpuusage = val; | |
139 | #endif | |
140 | } | |
141 | ||
142 | /* return total cpu usage (in nanoseconds) of a group */ | |
143 | static u64 cpuusage_read(struct cgroup *cgrp, struct cftype *cft) | |
144 | { | |
145 | struct cpuacct *ca = cgroup_ca(cgrp); | |
146 | u64 totalcpuusage = 0; | |
147 | int i; | |
148 | ||
149 | for_each_present_cpu(i) | |
150 | totalcpuusage += cpuacct_cpuusage_read(ca, i); | |
151 | ||
152 | return totalcpuusage; | |
153 | } | |
154 | ||
155 | static int cpuusage_write(struct cgroup *cgrp, struct cftype *cftype, | |
156 | u64 reset) | |
157 | { | |
158 | struct cpuacct *ca = cgroup_ca(cgrp); | |
159 | int err = 0; | |
160 | int i; | |
161 | ||
162 | if (reset) { | |
163 | err = -EINVAL; | |
164 | goto out; | |
165 | } | |
166 | ||
167 | for_each_present_cpu(i) | |
168 | cpuacct_cpuusage_write(ca, i, 0); | |
169 | ||
170 | out: | |
171 | return err; | |
172 | } | |
173 | ||
174 | static int cpuacct_percpu_seq_read(struct cgroup *cgroup, struct cftype *cft, | |
175 | struct seq_file *m) | |
176 | { | |
177 | struct cpuacct *ca = cgroup_ca(cgroup); | |
178 | u64 percpu; | |
179 | int i; | |
180 | ||
181 | for_each_present_cpu(i) { | |
182 | percpu = cpuacct_cpuusage_read(ca, i); | |
183 | seq_printf(m, "%llu ", (unsigned long long) percpu); | |
184 | } | |
185 | seq_printf(m, "\n"); | |
186 | return 0; | |
187 | } | |
188 | ||
189 | static const char * const cpuacct_stat_desc[] = { | |
190 | [CPUACCT_STAT_USER] = "user", | |
191 | [CPUACCT_STAT_SYSTEM] = "system", | |
192 | }; | |
193 | ||
194 | static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft, | |
195 | struct cgroup_map_cb *cb) | |
196 | { | |
197 | struct cpuacct *ca = cgroup_ca(cgrp); | |
198 | int cpu; | |
199 | s64 val = 0; | |
200 | ||
201 | for_each_online_cpu(cpu) { | |
202 | struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); | |
203 | val += kcpustat->cpustat[CPUTIME_USER]; | |
204 | val += kcpustat->cpustat[CPUTIME_NICE]; | |
205 | } | |
206 | val = cputime64_to_clock_t(val); | |
207 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_USER], val); | |
208 | ||
209 | val = 0; | |
210 | for_each_online_cpu(cpu) { | |
211 | struct kernel_cpustat *kcpustat = per_cpu_ptr(ca->cpustat, cpu); | |
212 | val += kcpustat->cpustat[CPUTIME_SYSTEM]; | |
213 | val += kcpustat->cpustat[CPUTIME_IRQ]; | |
214 | val += kcpustat->cpustat[CPUTIME_SOFTIRQ]; | |
215 | } | |
216 | ||
217 | val = cputime64_to_clock_t(val); | |
218 | cb->fill(cb, cpuacct_stat_desc[CPUACCT_STAT_SYSTEM], val); | |
219 | ||
220 | return 0; | |
221 | } | |
222 | ||
223 | static struct cftype files[] = { | |
224 | { | |
225 | .name = "usage", | |
226 | .read_u64 = cpuusage_read, | |
227 | .write_u64 = cpuusage_write, | |
228 | }, | |
229 | { | |
230 | .name = "usage_percpu", | |
231 | .read_seq_string = cpuacct_percpu_seq_read, | |
232 | }, | |
233 | { | |
234 | .name = "stat", | |
235 | .read_map = cpuacct_stats_show, | |
236 | }, | |
237 | { } /* terminate */ | |
238 | }; | |
239 | ||
240 | /* | |
241 | * charge this task's execution time to its accounting group. | |
242 | * | |
243 | * called with rq->lock held. | |
244 | */ | |
245 | void cpuacct_charge(struct task_struct *tsk, u64 cputime) | |
246 | { | |
247 | struct cpuacct *ca; | |
248 | int cpu; | |
249 | ||
250 | if (unlikely(!cpuacct_subsys.active)) | |
251 | return; | |
252 | ||
253 | cpu = task_cpu(tsk); | |
254 | ||
255 | rcu_read_lock(); | |
256 | ||
257 | ca = task_ca(tsk); | |
258 | ||
543bc0e7 | 259 | while (true) { |
2e76c24d LZ |
260 | u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu); |
261 | *cpuusage += cputime; | |
543bc0e7 LZ |
262 | |
263 | ca = parent_ca(ca); | |
264 | if (!ca) | |
265 | break; | |
2e76c24d LZ |
266 | } |
267 | ||
268 | rcu_read_unlock(); | |
269 | } | |
270 | ||
1966aaf7 LZ |
271 | /* |
272 | * Add user/system time to cpuacct. | |
273 | * | |
274 | * Note: it's the caller that updates the account of the root cgroup. | |
275 | */ | |
276 | void cpuacct_account_field(struct task_struct *p, int index, u64 val) | |
277 | { | |
278 | struct kernel_cpustat *kcpustat; | |
279 | struct cpuacct *ca; | |
280 | ||
281 | if (unlikely(!cpuacct_subsys.active)) | |
282 | return; | |
283 | ||
284 | rcu_read_lock(); | |
285 | ca = task_ca(p); | |
5f40d804 | 286 | while (ca != &root_cpuacct) { |
1966aaf7 LZ |
287 | kcpustat = this_cpu_ptr(ca->cpustat); |
288 | kcpustat->cpustat[index] += val; | |
5f40d804 | 289 | ca = __parent_ca(ca); |
1966aaf7 LZ |
290 | } |
291 | rcu_read_unlock(); | |
292 | } | |
293 | ||
2e76c24d LZ |
294 | struct cgroup_subsys cpuacct_subsys = { |
295 | .name = "cpuacct", | |
296 | .css_alloc = cpuacct_css_alloc, | |
297 | .css_free = cpuacct_css_free, | |
298 | .subsys_id = cpuacct_subsys_id, | |
299 | .base_cftypes = files, | |
300 | }; |