]>
Commit | Line | Data |
---|---|---|
73fbec60 FW |
1 | #include <linux/export.h> |
2 | #include <linux/sched.h> | |
3 | #include <linux/tsacct_kern.h> | |
4 | #include <linux/kernel_stat.h> | |
5 | #include <linux/static_key.h> | |
6 | #include "sched.h" | |
7 | ||
8 | ||
9 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | |
10 | ||
11 | /* | |
12 | * There are no locks covering percpu hardirq/softirq time. | |
bf9fae9f | 13 | * They are only modified in vtime_account, on corresponding CPU |
73fbec60 FW |
14 | * with interrupts disabled. So, writes are safe. |
15 | * They are read and saved off onto struct rq in update_rq_clock(). | |
16 | * This may result in other CPU reading this CPU's irq time and can | |
bf9fae9f | 17 | * race with irq/vtime_account on this CPU. We would either get old |
73fbec60 FW |
18 | * or new value with a side effect of accounting a slice of irq time to wrong |
19 | * task when irq is in progress while we read rq->clock. That is a worthy | |
20 | * compromise in place of having locks on each irq in account_system_time. | |
21 | */ | |
22 | DEFINE_PER_CPU(u64, cpu_hardirq_time); | |
23 | DEFINE_PER_CPU(u64, cpu_softirq_time); | |
24 | ||
25 | static DEFINE_PER_CPU(u64, irq_start_time); | |
26 | static int sched_clock_irqtime; | |
27 | ||
28 | void enable_sched_clock_irqtime(void) | |
29 | { | |
30 | sched_clock_irqtime = 1; | |
31 | } | |
32 | ||
33 | void disable_sched_clock_irqtime(void) | |
34 | { | |
35 | sched_clock_irqtime = 0; | |
36 | } | |
37 | ||
38 | #ifndef CONFIG_64BIT | |
39 | DEFINE_PER_CPU(seqcount_t, irq_time_seq); | |
40 | #endif /* CONFIG_64BIT */ | |
41 | ||
42 | /* | |
43 | * Called before incrementing preempt_count on {soft,}irq_enter | |
44 | * and before decrementing preempt_count on {soft,}irq_exit. | |
45 | */ | |
bf9fae9f | 46 | void vtime_account(struct task_struct *curr) |
73fbec60 FW |
47 | { |
48 | unsigned long flags; | |
49 | s64 delta; | |
50 | int cpu; | |
51 | ||
52 | if (!sched_clock_irqtime) | |
53 | return; | |
54 | ||
55 | local_irq_save(flags); | |
56 | ||
57 | cpu = smp_processor_id(); | |
58 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); | |
59 | __this_cpu_add(irq_start_time, delta); | |
60 | ||
61 | irq_time_write_begin(); | |
62 | /* | |
63 | * We do not account for softirq time from ksoftirqd here. | |
64 | * We want to continue accounting softirq time to ksoftirqd thread | |
65 | * in that case, so as not to confuse scheduler with a special task | |
66 | * that do not consume any time, but still wants to run. | |
67 | */ | |
68 | if (hardirq_count()) | |
69 | __this_cpu_add(cpu_hardirq_time, delta); | |
70 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) | |
71 | __this_cpu_add(cpu_softirq_time, delta); | |
72 | ||
73 | irq_time_write_end(); | |
74 | local_irq_restore(flags); | |
75 | } | |
bf9fae9f | 76 | EXPORT_SYMBOL_GPL(vtime_account); |
73fbec60 FW |
77 | |
78 | static int irqtime_account_hi_update(void) | |
79 | { | |
80 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
81 | unsigned long flags; | |
82 | u64 latest_ns; | |
83 | int ret = 0; | |
84 | ||
85 | local_irq_save(flags); | |
86 | latest_ns = this_cpu_read(cpu_hardirq_time); | |
87 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) | |
88 | ret = 1; | |
89 | local_irq_restore(flags); | |
90 | return ret; | |
91 | } | |
92 | ||
93 | static int irqtime_account_si_update(void) | |
94 | { | |
95 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
96 | unsigned long flags; | |
97 | u64 latest_ns; | |
98 | int ret = 0; | |
99 | ||
100 | local_irq_save(flags); | |
101 | latest_ns = this_cpu_read(cpu_softirq_time); | |
102 | if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) | |
103 | ret = 1; | |
104 | local_irq_restore(flags); | |
105 | return ret; | |
106 | } | |
107 | ||
108 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | |
109 | ||
110 | #define sched_clock_irqtime (0) | |
111 | ||
112 | #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ | |
113 | ||
114 | static inline void task_group_account_field(struct task_struct *p, int index, | |
115 | u64 tmp) | |
116 | { | |
117 | #ifdef CONFIG_CGROUP_CPUACCT | |
118 | struct kernel_cpustat *kcpustat; | |
119 | struct cpuacct *ca; | |
120 | #endif | |
121 | /* | |
122 | * Since all updates are sure to touch the root cgroup, we | |
123 | * get ourselves ahead and touch it first. If the root cgroup | |
124 | * is the only cgroup, then nothing else should be necessary. | |
125 | * | |
126 | */ | |
127 | __get_cpu_var(kernel_cpustat).cpustat[index] += tmp; | |
128 | ||
129 | #ifdef CONFIG_CGROUP_CPUACCT | |
130 | if (unlikely(!cpuacct_subsys.active)) | |
131 | return; | |
132 | ||
133 | rcu_read_lock(); | |
134 | ca = task_ca(p); | |
135 | while (ca && (ca != &root_cpuacct)) { | |
136 | kcpustat = this_cpu_ptr(ca->cpustat); | |
137 | kcpustat->cpustat[index] += tmp; | |
138 | ca = parent_ca(ca); | |
139 | } | |
140 | rcu_read_unlock(); | |
141 | #endif | |
142 | } | |
143 | ||
144 | /* | |
145 | * Account user cpu time to a process. | |
146 | * @p: the process that the cpu time gets accounted to | |
147 | * @cputime: the cpu time spent in user space since the last update | |
148 | * @cputime_scaled: cputime scaled by cpu frequency | |
149 | */ | |
150 | void account_user_time(struct task_struct *p, cputime_t cputime, | |
151 | cputime_t cputime_scaled) | |
152 | { | |
153 | int index; | |
154 | ||
155 | /* Add user time to process. */ | |
156 | p->utime += cputime; | |
157 | p->utimescaled += cputime_scaled; | |
158 | account_group_user_time(p, cputime); | |
159 | ||
160 | index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; | |
161 | ||
162 | /* Add user time to cpustat. */ | |
163 | task_group_account_field(p, index, (__force u64) cputime); | |
164 | ||
165 | /* Account for user time used */ | |
166 | acct_update_integrals(p); | |
167 | } | |
168 | ||
169 | /* | |
170 | * Account guest cpu time to a process. | |
171 | * @p: the process that the cpu time gets accounted to | |
172 | * @cputime: the cpu time spent in virtual machine since the last update | |
173 | * @cputime_scaled: cputime scaled by cpu frequency | |
174 | */ | |
175 | static void account_guest_time(struct task_struct *p, cputime_t cputime, | |
176 | cputime_t cputime_scaled) | |
177 | { | |
178 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
179 | ||
180 | /* Add guest time to process. */ | |
181 | p->utime += cputime; | |
182 | p->utimescaled += cputime_scaled; | |
183 | account_group_user_time(p, cputime); | |
184 | p->gtime += cputime; | |
185 | ||
186 | /* Add guest time to cpustat. */ | |
187 | if (TASK_NICE(p) > 0) { | |
188 | cpustat[CPUTIME_NICE] += (__force u64) cputime; | |
189 | cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; | |
190 | } else { | |
191 | cpustat[CPUTIME_USER] += (__force u64) cputime; | |
192 | cpustat[CPUTIME_GUEST] += (__force u64) cputime; | |
193 | } | |
194 | } | |
195 | ||
196 | /* | |
197 | * Account system cpu time to a process and desired cpustat field | |
198 | * @p: the process that the cpu time gets accounted to | |
199 | * @cputime: the cpu time spent in kernel space since the last update | |
200 | * @cputime_scaled: cputime scaled by cpu frequency | |
201 | * @target_cputime64: pointer to cpustat field that has to be updated | |
202 | */ | |
203 | static inline | |
204 | void __account_system_time(struct task_struct *p, cputime_t cputime, | |
205 | cputime_t cputime_scaled, int index) | |
206 | { | |
207 | /* Add system time to process. */ | |
208 | p->stime += cputime; | |
209 | p->stimescaled += cputime_scaled; | |
210 | account_group_system_time(p, cputime); | |
211 | ||
212 | /* Add system time to cpustat. */ | |
213 | task_group_account_field(p, index, (__force u64) cputime); | |
214 | ||
215 | /* Account for system time used */ | |
216 | acct_update_integrals(p); | |
217 | } | |
218 | ||
219 | /* | |
220 | * Account system cpu time to a process. | |
221 | * @p: the process that the cpu time gets accounted to | |
222 | * @hardirq_offset: the offset to subtract from hardirq_count() | |
223 | * @cputime: the cpu time spent in kernel space since the last update | |
224 | * @cputime_scaled: cputime scaled by cpu frequency | |
225 | */ | |
226 | void account_system_time(struct task_struct *p, int hardirq_offset, | |
227 | cputime_t cputime, cputime_t cputime_scaled) | |
228 | { | |
229 | int index; | |
230 | ||
231 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { | |
232 | account_guest_time(p, cputime, cputime_scaled); | |
233 | return; | |
234 | } | |
235 | ||
236 | if (hardirq_count() - hardirq_offset) | |
237 | index = CPUTIME_IRQ; | |
238 | else if (in_serving_softirq()) | |
239 | index = CPUTIME_SOFTIRQ; | |
240 | else | |
241 | index = CPUTIME_SYSTEM; | |
242 | ||
243 | __account_system_time(p, cputime, cputime_scaled, index); | |
244 | } | |
245 | ||
246 | /* | |
247 | * Account for involuntary wait time. | |
248 | * @cputime: the cpu time spent in involuntary wait | |
249 | */ | |
250 | void account_steal_time(cputime_t cputime) | |
251 | { | |
252 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
253 | ||
254 | cpustat[CPUTIME_STEAL] += (__force u64) cputime; | |
255 | } | |
256 | ||
257 | /* | |
258 | * Account for idle time. | |
259 | * @cputime: the cpu time spent in idle wait | |
260 | */ | |
261 | void account_idle_time(cputime_t cputime) | |
262 | { | |
263 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
264 | struct rq *rq = this_rq(); | |
265 | ||
266 | if (atomic_read(&rq->nr_iowait) > 0) | |
267 | cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; | |
268 | else | |
269 | cpustat[CPUTIME_IDLE] += (__force u64) cputime; | |
270 | } | |
271 | ||
272 | static __always_inline bool steal_account_process_tick(void) | |
273 | { | |
274 | #ifdef CONFIG_PARAVIRT | |
275 | if (static_key_false(¶virt_steal_enabled)) { | |
276 | u64 steal, st = 0; | |
277 | ||
278 | steal = paravirt_steal_clock(smp_processor_id()); | |
279 | steal -= this_rq()->prev_steal_time; | |
280 | ||
281 | st = steal_ticks(steal); | |
282 | this_rq()->prev_steal_time += st * TICK_NSEC; | |
283 | ||
284 | account_steal_time(st); | |
285 | return st; | |
286 | } | |
287 | #endif | |
288 | return false; | |
289 | } | |
290 | ||
291 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | |
292 | ||
293 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | |
294 | /* | |
295 | * Account a tick to a process and cpustat | |
296 | * @p: the process that the cpu time gets accounted to | |
297 | * @user_tick: is the tick from userspace | |
298 | * @rq: the pointer to rq | |
299 | * | |
300 | * Tick demultiplexing follows the order | |
301 | * - pending hardirq update | |
302 | * - pending softirq update | |
303 | * - user_time | |
304 | * - idle_time | |
305 | * - system time | |
306 | * - check for guest_time | |
307 | * - else account as system_time | |
308 | * | |
309 | * Check for hardirq is done both for system and user time as there is | |
310 | * no timer going off while we are on hardirq and hence we may never get an | |
311 | * opportunity to update it solely in system time. | |
312 | * p->stime and friends are only updated on system time and not on irq | |
313 | * softirq as those do not count in task exec_runtime any more. | |
314 | */ | |
315 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | |
316 | struct rq *rq) | |
317 | { | |
318 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | |
319 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
320 | ||
321 | if (steal_account_process_tick()) | |
322 | return; | |
323 | ||
324 | if (irqtime_account_hi_update()) { | |
325 | cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; | |
326 | } else if (irqtime_account_si_update()) { | |
327 | cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; | |
328 | } else if (this_cpu_ksoftirqd() == p) { | |
329 | /* | |
330 | * ksoftirqd time do not get accounted in cpu_softirq_time. | |
331 | * So, we have to handle it separately here. | |
332 | * Also, p->stime needs to be updated for ksoftirqd. | |
333 | */ | |
334 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | |
335 | CPUTIME_SOFTIRQ); | |
336 | } else if (user_tick) { | |
337 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | |
338 | } else if (p == rq->idle) { | |
339 | account_idle_time(cputime_one_jiffy); | |
340 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ | |
341 | account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); | |
342 | } else { | |
343 | __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, | |
344 | CPUTIME_SYSTEM); | |
345 | } | |
346 | } | |
347 | ||
348 | static void irqtime_account_idle_ticks(int ticks) | |
349 | { | |
350 | int i; | |
351 | struct rq *rq = this_rq(); | |
352 | ||
353 | for (i = 0; i < ticks; i++) | |
354 | irqtime_account_process_tick(current, 0, rq); | |
355 | } | |
356 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | |
357 | static void irqtime_account_idle_ticks(int ticks) {} | |
358 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | |
359 | struct rq *rq) {} | |
360 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | |
361 | ||
362 | /* | |
363 | * Account a single tick of cpu time. | |
364 | * @p: the process that the cpu time gets accounted to | |
365 | * @user_tick: indicates if the tick is a user or a system tick | |
366 | */ | |
367 | void account_process_tick(struct task_struct *p, int user_tick) | |
368 | { | |
369 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); | |
370 | struct rq *rq = this_rq(); | |
371 | ||
372 | if (sched_clock_irqtime) { | |
373 | irqtime_account_process_tick(p, user_tick, rq); | |
374 | return; | |
375 | } | |
376 | ||
377 | if (steal_account_process_tick()) | |
378 | return; | |
379 | ||
380 | if (user_tick) | |
381 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | |
382 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | |
383 | account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, | |
384 | one_jiffy_scaled); | |
385 | else | |
386 | account_idle_time(cputime_one_jiffy); | |
387 | } | |
388 | ||
389 | /* | |
390 | * Account multiple ticks of steal time. | |
391 | * @p: the process from which the cpu time has been stolen | |
392 | * @ticks: number of stolen ticks | |
393 | */ | |
394 | void account_steal_ticks(unsigned long ticks) | |
395 | { | |
396 | account_steal_time(jiffies_to_cputime(ticks)); | |
397 | } | |
398 | ||
399 | /* | |
400 | * Account multiple ticks of idle time. | |
401 | * @ticks: number of stolen ticks | |
402 | */ | |
403 | void account_idle_ticks(unsigned long ticks) | |
404 | { | |
405 | ||
406 | if (sched_clock_irqtime) { | |
407 | irqtime_account_idle_ticks(ticks); | |
408 | return; | |
409 | } | |
410 | ||
411 | account_idle_time(jiffies_to_cputime(ticks)); | |
412 | } | |
413 | ||
414 | #endif | |
415 | ||
416 | /* | |
417 | * Use precise platform statistics if available: | |
418 | */ | |
419 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | |
420 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | |
421 | { | |
422 | *ut = p->utime; | |
423 | *st = p->stime; | |
424 | } | |
425 | ||
426 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | |
427 | { | |
428 | struct task_cputime cputime; | |
429 | ||
430 | thread_group_cputime(p, &cputime); | |
431 | ||
432 | *ut = cputime.utime; | |
433 | *st = cputime.stime; | |
434 | } | |
a7e1a9e3 FW |
435 | |
436 | /* | |
437 | * Archs that account the whole time spent in the idle task | |
438 | * (outside irq) as idle time can rely on this and just implement | |
439 | * vtime_account_system() and vtime_account_idle(). Archs that | |
440 | * have other meaning of the idle time (s390 only includes the | |
441 | * time spent by the CPU when it's in low power mode) must override | |
442 | * vtime_account(). | |
443 | */ | |
444 | #ifndef __ARCH_HAS_VTIME_ACCOUNT | |
445 | void vtime_account(struct task_struct *tsk) | |
446 | { | |
447 | unsigned long flags; | |
448 | ||
449 | local_irq_save(flags); | |
450 | ||
451 | if (in_interrupt() || !is_idle_task(tsk)) | |
452 | vtime_account_system(tsk); | |
453 | else | |
454 | vtime_account_idle(tsk); | |
455 | ||
456 | local_irq_restore(flags); | |
457 | } | |
458 | EXPORT_SYMBOL_GPL(vtime_account); | |
459 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ | |
460 | ||
73fbec60 FW |
461 | #else |
462 | ||
463 | #ifndef nsecs_to_cputime | |
464 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) | |
465 | #endif | |
466 | ||
467 | static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) | |
468 | { | |
469 | u64 temp = (__force u64) rtime; | |
470 | ||
471 | temp *= (__force u64) utime; | |
472 | ||
473 | if (sizeof(cputime_t) == 4) | |
474 | temp = div_u64(temp, (__force u32) total); | |
475 | else | |
476 | temp = div64_u64(temp, (__force u64) total); | |
477 | ||
478 | return (__force cputime_t) temp; | |
479 | } | |
480 | ||
481 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | |
482 | { | |
483 | cputime_t rtime, utime = p->utime, total = utime + p->stime; | |
484 | ||
485 | /* | |
486 | * Use CFS's precise accounting: | |
487 | */ | |
488 | rtime = nsecs_to_cputime(p->se.sum_exec_runtime); | |
489 | ||
490 | if (total) | |
491 | utime = scale_utime(utime, rtime, total); | |
492 | else | |
493 | utime = rtime; | |
494 | ||
495 | /* | |
496 | * Compare with previous values, to keep monotonicity: | |
497 | */ | |
498 | p->prev_utime = max(p->prev_utime, utime); | |
499 | p->prev_stime = max(p->prev_stime, rtime - p->prev_utime); | |
500 | ||
501 | *ut = p->prev_utime; | |
502 | *st = p->prev_stime; | |
503 | } | |
504 | ||
505 | /* | |
506 | * Must be called with siglock held. | |
507 | */ | |
508 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | |
509 | { | |
510 | struct signal_struct *sig = p->signal; | |
511 | struct task_cputime cputime; | |
512 | cputime_t rtime, utime, total; | |
513 | ||
514 | thread_group_cputime(p, &cputime); | |
515 | ||
516 | total = cputime.utime + cputime.stime; | |
517 | rtime = nsecs_to_cputime(cputime.sum_exec_runtime); | |
518 | ||
519 | if (total) | |
520 | utime = scale_utime(cputime.utime, rtime, total); | |
521 | else | |
522 | utime = rtime; | |
523 | ||
524 | sig->prev_utime = max(sig->prev_utime, utime); | |
525 | sig->prev_stime = max(sig->prev_stime, rtime - sig->prev_utime); | |
526 | ||
527 | *ut = sig->prev_utime; | |
528 | *st = sig->prev_stime; | |
529 | } | |
530 | #endif |