]>
Commit | Line | Data |
---|---|---|
73fbec60 FW |
1 | #include <linux/export.h> |
2 | #include <linux/sched.h> | |
3 | #include <linux/tsacct_kern.h> | |
4 | #include <linux/kernel_stat.h> | |
5 | #include <linux/static_key.h> | |
abf917cd | 6 | #include <linux/context_tracking.h> |
73fbec60 | 7 | #include "sched.h" |
1fe7c4ef SS |
8 | #ifdef CONFIG_PARAVIRT |
9 | #include <asm/paravirt.h> | |
10 | #endif | |
73fbec60 FW |
11 | |
12 | ||
13 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | |
14 | ||
15 | /* | |
16 | * There are no locks covering percpu hardirq/softirq time. | |
bf9fae9f | 17 | * They are only modified in vtime_account, on corresponding CPU |
73fbec60 FW |
18 | * with interrupts disabled. So, writes are safe. |
19 | * They are read and saved off onto struct rq in update_rq_clock(). | |
20 | * This may result in other CPU reading this CPU's irq time and can | |
bf9fae9f | 21 | * race with irq/vtime_account on this CPU. We would either get old |
73fbec60 FW |
22 | * or new value with a side effect of accounting a slice of irq time to wrong |
23 | * task when irq is in progress while we read rq->clock. That is a worthy | |
24 | * compromise in place of having locks on each irq in account_system_time. | |
25 | */ | |
19d23dbf | 26 | DEFINE_PER_CPU(struct irqtime, cpu_irqtime); |
73fbec60 | 27 | |
73fbec60 FW |
28 | static int sched_clock_irqtime; |
29 | ||
30 | void enable_sched_clock_irqtime(void) | |
31 | { | |
32 | sched_clock_irqtime = 1; | |
33 | } | |
34 | ||
35 | void disable_sched_clock_irqtime(void) | |
36 | { | |
37 | sched_clock_irqtime = 0; | |
38 | } | |
39 | ||
73fbec60 FW |
40 | /* |
41 | * Called before incrementing preempt_count on {soft,}irq_enter | |
42 | * and before decrementing preempt_count on {soft,}irq_exit. | |
43 | */ | |
3e1df4f5 | 44 | void irqtime_account_irq(struct task_struct *curr) |
73fbec60 | 45 | { |
19d23dbf | 46 | struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); |
73fbec60 FW |
47 | s64 delta; |
48 | int cpu; | |
49 | ||
50 | if (!sched_clock_irqtime) | |
51 | return; | |
52 | ||
73fbec60 | 53 | cpu = smp_processor_id(); |
19d23dbf FW |
54 | delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; |
55 | irqtime->irq_start_time += delta; | |
73fbec60 | 56 | |
19d23dbf | 57 | u64_stats_update_begin(&irqtime->sync); |
73fbec60 FW |
58 | /* |
59 | * We do not account for softirq time from ksoftirqd here. | |
60 | * We want to continue accounting softirq time to ksoftirqd thread | |
61 | * in that case, so as not to confuse scheduler with a special task | |
62 | * that do not consume any time, but still wants to run. | |
63 | */ | |
64 | if (hardirq_count()) | |
19d23dbf | 65 | irqtime->hardirq_time += delta; |
73fbec60 | 66 | else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) |
19d23dbf | 67 | irqtime->softirq_time += delta; |
73fbec60 | 68 | |
19d23dbf | 69 | u64_stats_update_end(&irqtime->sync); |
73fbec60 | 70 | } |
3e1df4f5 | 71 | EXPORT_SYMBOL_GPL(irqtime_account_irq); |
73fbec60 | 72 | |
447976ef | 73 | static cputime_t irqtime_account_update(u64 irqtime, int idx, cputime_t maxtime) |
73fbec60 FW |
74 | { |
75 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
57430218 | 76 | cputime_t irq_cputime; |
73fbec60 | 77 | |
447976ef | 78 | irq_cputime = nsecs_to_cputime64(irqtime) - cpustat[idx]; |
57430218 | 79 | irq_cputime = min(irq_cputime, maxtime); |
447976ef | 80 | cpustat[idx] += irq_cputime; |
2810f611 | 81 | |
57430218 | 82 | return irq_cputime; |
73fbec60 FW |
83 | } |
84 | ||
447976ef | 85 | static cputime_t irqtime_account_hi_update(cputime_t maxtime) |
73fbec60 | 86 | { |
447976ef FW |
87 | return irqtime_account_update(__this_cpu_read(cpu_irqtime.hardirq_time), |
88 | CPUTIME_IRQ, maxtime); | |
89 | } | |
2810f611 | 90 | |
447976ef FW |
91 | static cputime_t irqtime_account_si_update(cputime_t maxtime) |
92 | { | |
93 | return irqtime_account_update(__this_cpu_read(cpu_irqtime.softirq_time), | |
94 | CPUTIME_SOFTIRQ, maxtime); | |
73fbec60 FW |
95 | } |
96 | ||
97 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | |
98 | ||
99 | #define sched_clock_irqtime (0) | |
100 | ||
57430218 RR |
101 | static cputime_t irqtime_account_hi_update(cputime_t dummy) |
102 | { | |
103 | return 0; | |
104 | } | |
105 | ||
106 | static cputime_t irqtime_account_si_update(cputime_t dummy) | |
107 | { | |
108 | return 0; | |
109 | } | |
110 | ||
73fbec60 FW |
111 | #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ |
112 | ||
113 | static inline void task_group_account_field(struct task_struct *p, int index, | |
114 | u64 tmp) | |
115 | { | |
73fbec60 FW |
116 | /* |
117 | * Since all updates are sure to touch the root cgroup, we | |
118 | * get ourselves ahead and touch it first. If the root cgroup | |
119 | * is the only cgroup, then nothing else should be necessary. | |
120 | * | |
121 | */ | |
a4f61cc0 | 122 | __this_cpu_add(kernel_cpustat.cpustat[index], tmp); |
73fbec60 | 123 | |
1966aaf7 | 124 | cpuacct_account_field(p, index, tmp); |
73fbec60 FW |
125 | } |
126 | ||
127 | /* | |
128 | * Account user cpu time to a process. | |
129 | * @p: the process that the cpu time gets accounted to | |
130 | * @cputime: the cpu time spent in user space since the last update | |
73fbec60 | 131 | */ |
40565b5a | 132 | void account_user_time(struct task_struct *p, cputime_t cputime) |
73fbec60 FW |
133 | { |
134 | int index; | |
135 | ||
136 | /* Add user time to process. */ | |
137 | p->utime += cputime; | |
73fbec60 FW |
138 | account_group_user_time(p, cputime); |
139 | ||
d0ea0268 | 140 | index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; |
73fbec60 FW |
141 | |
142 | /* Add user time to cpustat. */ | |
143 | task_group_account_field(p, index, (__force u64) cputime); | |
144 | ||
145 | /* Account for user time used */ | |
6fac4829 | 146 | acct_account_cputime(p); |
73fbec60 FW |
147 | } |
148 | ||
149 | /* | |
150 | * Account guest cpu time to a process. | |
151 | * @p: the process that the cpu time gets accounted to | |
152 | * @cputime: the cpu time spent in virtual machine since the last update | |
73fbec60 | 153 | */ |
40565b5a | 154 | static void account_guest_time(struct task_struct *p, cputime_t cputime) |
73fbec60 FW |
155 | { |
156 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
157 | ||
158 | /* Add guest time to process. */ | |
159 | p->utime += cputime; | |
73fbec60 FW |
160 | account_group_user_time(p, cputime); |
161 | p->gtime += cputime; | |
162 | ||
163 | /* Add guest time to cpustat. */ | |
d0ea0268 | 164 | if (task_nice(p) > 0) { |
73fbec60 FW |
165 | cpustat[CPUTIME_NICE] += (__force u64) cputime; |
166 | cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; | |
167 | } else { | |
168 | cpustat[CPUTIME_USER] += (__force u64) cputime; | |
169 | cpustat[CPUTIME_GUEST] += (__force u64) cputime; | |
170 | } | |
171 | } | |
172 | ||
173 | /* | |
174 | * Account system cpu time to a process and desired cpustat field | |
175 | * @p: the process that the cpu time gets accounted to | |
176 | * @cputime: the cpu time spent in kernel space since the last update | |
40565b5a | 177 | * @index: pointer to cpustat field that has to be updated |
73fbec60 FW |
178 | */ |
179 | static inline | |
40565b5a | 180 | void __account_system_time(struct task_struct *p, cputime_t cputime, int index) |
73fbec60 FW |
181 | { |
182 | /* Add system time to process. */ | |
183 | p->stime += cputime; | |
73fbec60 FW |
184 | account_group_system_time(p, cputime); |
185 | ||
186 | /* Add system time to cpustat. */ | |
187 | task_group_account_field(p, index, (__force u64) cputime); | |
188 | ||
189 | /* Account for system time used */ | |
6fac4829 | 190 | acct_account_cputime(p); |
73fbec60 FW |
191 | } |
192 | ||
193 | /* | |
194 | * Account system cpu time to a process. | |
195 | * @p: the process that the cpu time gets accounted to | |
196 | * @hardirq_offset: the offset to subtract from hardirq_count() | |
197 | * @cputime: the cpu time spent in kernel space since the last update | |
73fbec60 FW |
198 | */ |
199 | void account_system_time(struct task_struct *p, int hardirq_offset, | |
40565b5a | 200 | cputime_t cputime) |
73fbec60 FW |
201 | { |
202 | int index; | |
203 | ||
204 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { | |
40565b5a | 205 | account_guest_time(p, cputime); |
73fbec60 FW |
206 | return; |
207 | } | |
208 | ||
209 | if (hardirq_count() - hardirq_offset) | |
210 | index = CPUTIME_IRQ; | |
211 | else if (in_serving_softirq()) | |
212 | index = CPUTIME_SOFTIRQ; | |
213 | else | |
214 | index = CPUTIME_SYSTEM; | |
215 | ||
40565b5a | 216 | __account_system_time(p, cputime, index); |
73fbec60 FW |
217 | } |
218 | ||
219 | /* | |
220 | * Account for involuntary wait time. | |
221 | * @cputime: the cpu time spent in involuntary wait | |
222 | */ | |
223 | void account_steal_time(cputime_t cputime) | |
224 | { | |
225 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
226 | ||
227 | cpustat[CPUTIME_STEAL] += (__force u64) cputime; | |
228 | } | |
229 | ||
230 | /* | |
231 | * Account for idle time. | |
232 | * @cputime: the cpu time spent in idle wait | |
233 | */ | |
234 | void account_idle_time(cputime_t cputime) | |
235 | { | |
236 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
237 | struct rq *rq = this_rq(); | |
238 | ||
239 | if (atomic_read(&rq->nr_iowait) > 0) | |
240 | cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; | |
241 | else | |
242 | cpustat[CPUTIME_IDLE] += (__force u64) cputime; | |
243 | } | |
244 | ||
03cbc732 WL |
245 | /* |
246 | * When a guest is interrupted for a longer amount of time, missed clock | |
247 | * ticks are not redelivered later. Due to that, this function may on | |
248 | * occasion account more time than the calling functions think elapsed. | |
249 | */ | |
57430218 | 250 | static __always_inline cputime_t steal_account_process_time(cputime_t maxtime) |
73fbec60 FW |
251 | { |
252 | #ifdef CONFIG_PARAVIRT | |
253 | if (static_key_false(¶virt_steal_enabled)) { | |
57430218 | 254 | cputime_t steal_cputime; |
dee08a72 | 255 | u64 steal; |
73fbec60 FW |
256 | |
257 | steal = paravirt_steal_clock(smp_processor_id()); | |
258 | steal -= this_rq()->prev_steal_time; | |
259 | ||
57430218 RR |
260 | steal_cputime = min(nsecs_to_cputime(steal), maxtime); |
261 | account_steal_time(steal_cputime); | |
262 | this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime); | |
73fbec60 | 263 | |
57430218 | 264 | return steal_cputime; |
73fbec60 FW |
265 | } |
266 | #endif | |
807e5b80 | 267 | return 0; |
73fbec60 FW |
268 | } |
269 | ||
57430218 RR |
270 | /* |
271 | * Account how much elapsed time was spent in steal, irq, or softirq time. | |
272 | */ | |
273 | static inline cputime_t account_other_time(cputime_t max) | |
274 | { | |
275 | cputime_t accounted; | |
276 | ||
2810f611 FW |
277 | /* Shall be converted to a lockdep-enabled lightweight check */ |
278 | WARN_ON_ONCE(!irqs_disabled()); | |
279 | ||
57430218 RR |
280 | accounted = steal_account_process_time(max); |
281 | ||
282 | if (accounted < max) | |
283 | accounted += irqtime_account_hi_update(max - accounted); | |
284 | ||
285 | if (accounted < max) | |
286 | accounted += irqtime_account_si_update(max - accounted); | |
287 | ||
288 | return accounted; | |
289 | } | |
290 | ||
a1eb1411 SG |
291 | #ifdef CONFIG_64BIT |
292 | static inline u64 read_sum_exec_runtime(struct task_struct *t) | |
293 | { | |
294 | return t->se.sum_exec_runtime; | |
295 | } | |
296 | #else | |
297 | static u64 read_sum_exec_runtime(struct task_struct *t) | |
298 | { | |
299 | u64 ns; | |
300 | struct rq_flags rf; | |
301 | struct rq *rq; | |
302 | ||
303 | rq = task_rq_lock(t, &rf); | |
304 | ns = t->se.sum_exec_runtime; | |
305 | task_rq_unlock(rq, t, &rf); | |
306 | ||
307 | return ns; | |
308 | } | |
309 | #endif | |
310 | ||
a634f933 FW |
311 | /* |
312 | * Accumulate raw cputime values of dead tasks (sig->[us]time) and live | |
313 | * tasks (sum on group iteration) belonging to @tsk's group. | |
314 | */ | |
315 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) | |
316 | { | |
317 | struct signal_struct *sig = tsk->signal; | |
6fac4829 | 318 | cputime_t utime, stime; |
a634f933 | 319 | struct task_struct *t; |
e78c3496 | 320 | unsigned int seq, nextseq; |
9c368b5b | 321 | unsigned long flags; |
a634f933 | 322 | |
a1eb1411 SG |
323 | /* |
324 | * Update current task runtime to account pending time since last | |
325 | * scheduler action or thread_group_cputime() call. This thread group | |
326 | * might have other running tasks on different CPUs, but updating | |
327 | * their runtime can affect syscall performance, so we skip account | |
328 | * those pending times and rely only on values updated on tick or | |
329 | * other scheduler action. | |
330 | */ | |
331 | if (same_thread_group(current, tsk)) | |
332 | (void) task_sched_runtime(current); | |
333 | ||
a634f933 | 334 | rcu_read_lock(); |
e78c3496 RR |
335 | /* Attempt a lockless read on the first round. */ |
336 | nextseq = 0; | |
337 | do { | |
338 | seq = nextseq; | |
9c368b5b | 339 | flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); |
e78c3496 RR |
340 | times->utime = sig->utime; |
341 | times->stime = sig->stime; | |
342 | times->sum_exec_runtime = sig->sum_sched_runtime; | |
343 | ||
344 | for_each_thread(tsk, t) { | |
345 | task_cputime(t, &utime, &stime); | |
346 | times->utime += utime; | |
347 | times->stime += stime; | |
a1eb1411 | 348 | times->sum_exec_runtime += read_sum_exec_runtime(t); |
e78c3496 RR |
349 | } |
350 | /* If lockless access failed, take the lock. */ | |
351 | nextseq = 1; | |
352 | } while (need_seqretry(&sig->stats_lock, seq)); | |
9c368b5b | 353 | done_seqretry_irqrestore(&sig->stats_lock, seq, flags); |
a634f933 FW |
354 | rcu_read_unlock(); |
355 | } | |
356 | ||
73fbec60 FW |
357 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
358 | /* | |
359 | * Account a tick to a process and cpustat | |
360 | * @p: the process that the cpu time gets accounted to | |
361 | * @user_tick: is the tick from userspace | |
362 | * @rq: the pointer to rq | |
363 | * | |
364 | * Tick demultiplexing follows the order | |
365 | * - pending hardirq update | |
366 | * - pending softirq update | |
367 | * - user_time | |
368 | * - idle_time | |
369 | * - system time | |
370 | * - check for guest_time | |
371 | * - else account as system_time | |
372 | * | |
373 | * Check for hardirq is done both for system and user time as there is | |
374 | * no timer going off while we are on hardirq and hence we may never get an | |
375 | * opportunity to update it solely in system time. | |
376 | * p->stime and friends are only updated on system time and not on irq | |
377 | * softirq as those do not count in task exec_runtime any more. | |
378 | */ | |
379 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | |
2d513868 | 380 | struct rq *rq, int ticks) |
73fbec60 | 381 | { |
57430218 | 382 | u64 cputime = (__force u64) cputime_one_jiffy * ticks; |
981ee2d4 | 383 | cputime_t other; |
73fbec60 | 384 | |
57430218 RR |
385 | /* |
386 | * When returning from idle, many ticks can get accounted at | |
387 | * once, including some ticks of steal, irq, and softirq time. | |
388 | * Subtract those ticks from the amount of time accounted to | |
389 | * idle, or potentially user or system time. Due to rounding, | |
390 | * other time can exceed ticks occasionally. | |
391 | */ | |
03cbc732 | 392 | other = account_other_time(ULONG_MAX); |
57430218 | 393 | if (other >= cputime) |
73fbec60 | 394 | return; |
57430218 | 395 | cputime -= other; |
73fbec60 | 396 | |
57430218 | 397 | if (this_cpu_ksoftirqd() == p) { |
73fbec60 FW |
398 | /* |
399 | * ksoftirqd time do not get accounted in cpu_softirq_time. | |
400 | * So, we have to handle it separately here. | |
401 | * Also, p->stime needs to be updated for ksoftirqd. | |
402 | */ | |
40565b5a | 403 | __account_system_time(p, cputime, CPUTIME_SOFTIRQ); |
73fbec60 | 404 | } else if (user_tick) { |
40565b5a | 405 | account_user_time(p, cputime); |
73fbec60 | 406 | } else if (p == rq->idle) { |
2d513868 | 407 | account_idle_time(cputime); |
73fbec60 | 408 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ |
40565b5a | 409 | account_guest_time(p, cputime); |
73fbec60 | 410 | } else { |
40565b5a | 411 | __account_system_time(p, cputime, CPUTIME_SYSTEM); |
73fbec60 FW |
412 | } |
413 | } | |
414 | ||
415 | static void irqtime_account_idle_ticks(int ticks) | |
416 | { | |
73fbec60 FW |
417 | struct rq *rq = this_rq(); |
418 | ||
2d513868 | 419 | irqtime_account_process_tick(current, 0, rq, ticks); |
73fbec60 FW |
420 | } |
421 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | |
3f4724ea FW |
422 | static inline void irqtime_account_idle_ticks(int ticks) {} |
423 | static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, | |
2d513868 | 424 | struct rq *rq, int nr_ticks) {} |
73fbec60 FW |
425 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
426 | ||
73fbec60 FW |
427 | /* |
428 | * Use precise platform statistics if available: | |
429 | */ | |
430 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | |
a7e1a9e3 | 431 | |
e3942ba0 | 432 | #ifndef __ARCH_HAS_VTIME_TASK_SWITCH |
b0493406 | 433 | void vtime_common_task_switch(struct task_struct *prev) |
e3942ba0 FW |
434 | { |
435 | if (is_idle_task(prev)) | |
436 | vtime_account_idle(prev); | |
437 | else | |
438 | vtime_account_system(prev); | |
439 | ||
abf917cd | 440 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
e3942ba0 | 441 | vtime_account_user(prev); |
abf917cd | 442 | #endif |
e3942ba0 FW |
443 | arch_vtime_task_switch(prev); |
444 | } | |
445 | #endif | |
11113334 | 446 | |
0cfdf9a1 FW |
447 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ |
448 | ||
449 | ||
450 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE | |
a7e1a9e3 FW |
451 | /* |
452 | * Archs that account the whole time spent in the idle task | |
453 | * (outside irq) as idle time can rely on this and just implement | |
fd25b4c2 | 454 | * vtime_account_system() and vtime_account_idle(). Archs that |
a7e1a9e3 FW |
455 | * have other meaning of the idle time (s390 only includes the |
456 | * time spent by the CPU when it's in low power mode) must override | |
457 | * vtime_account(). | |
458 | */ | |
459 | #ifndef __ARCH_HAS_VTIME_ACCOUNT | |
0cfdf9a1 | 460 | void vtime_account_irq_enter(struct task_struct *tsk) |
a7e1a9e3 | 461 | { |
0cfdf9a1 FW |
462 | if (!in_interrupt() && is_idle_task(tsk)) |
463 | vtime_account_idle(tsk); | |
464 | else | |
465 | vtime_account_system(tsk); | |
a7e1a9e3 | 466 | } |
0cfdf9a1 | 467 | EXPORT_SYMBOL_GPL(vtime_account_irq_enter); |
a7e1a9e3 | 468 | #endif /* __ARCH_HAS_VTIME_ACCOUNT */ |
9fbc42ea | 469 | |
9fbc42ea FW |
470 | void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
471 | { | |
472 | *ut = p->utime; | |
473 | *st = p->stime; | |
474 | } | |
9eec50b8 | 475 | EXPORT_SYMBOL_GPL(task_cputime_adjusted); |
a7e1a9e3 | 476 | |
9fbc42ea FW |
477 | void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
478 | { | |
479 | struct task_cputime cputime; | |
73fbec60 | 480 | |
9fbc42ea FW |
481 | thread_group_cputime(p, &cputime); |
482 | ||
483 | *ut = cputime.utime; | |
484 | *st = cputime.stime; | |
485 | } | |
486 | #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ | |
487 | /* | |
488 | * Account a single tick of cpu time. | |
489 | * @p: the process that the cpu time gets accounted to | |
490 | * @user_tick: indicates if the tick is a user or a system tick | |
491 | */ | |
492 | void account_process_tick(struct task_struct *p, int user_tick) | |
73fbec60 | 493 | { |
981ee2d4 | 494 | cputime_t cputime, steal; |
9fbc42ea | 495 | struct rq *rq = this_rq(); |
73fbec60 | 496 | |
55dbdcfa | 497 | if (vtime_accounting_cpu_enabled()) |
9fbc42ea FW |
498 | return; |
499 | ||
500 | if (sched_clock_irqtime) { | |
2d513868 | 501 | irqtime_account_process_tick(p, user_tick, rq, 1); |
9fbc42ea FW |
502 | return; |
503 | } | |
504 | ||
57430218 | 505 | cputime = cputime_one_jiffy; |
03cbc732 | 506 | steal = steal_account_process_time(ULONG_MAX); |
57430218 RR |
507 | |
508 | if (steal >= cputime) | |
9fbc42ea | 509 | return; |
73fbec60 | 510 | |
57430218 | 511 | cputime -= steal; |
57430218 | 512 | |
9fbc42ea | 513 | if (user_tick) |
40565b5a | 514 | account_user_time(p, cputime); |
9fbc42ea | 515 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) |
40565b5a | 516 | account_system_time(p, HARDIRQ_OFFSET, cputime); |
73fbec60 | 517 | else |
57430218 | 518 | account_idle_time(cputime); |
9fbc42ea | 519 | } |
73fbec60 | 520 | |
9fbc42ea FW |
521 | /* |
522 | * Account multiple ticks of idle time. | |
523 | * @ticks: number of stolen ticks | |
524 | */ | |
525 | void account_idle_ticks(unsigned long ticks) | |
526 | { | |
f9bcf1e0 | 527 | cputime_t cputime, steal; |
26f2c75c | 528 | |
9fbc42ea FW |
529 | if (sched_clock_irqtime) { |
530 | irqtime_account_idle_ticks(ticks); | |
531 | return; | |
532 | } | |
533 | ||
26f2c75c | 534 | cputime = jiffies_to_cputime(ticks); |
03cbc732 | 535 | steal = steal_account_process_time(ULONG_MAX); |
f9bcf1e0 WL |
536 | |
537 | if (steal >= cputime) | |
538 | return; | |
539 | ||
540 | cputime -= steal; | |
541 | account_idle_time(cputime); | |
9fbc42ea | 542 | } |
73fbec60 | 543 | |
d9a3c982 | 544 | /* |
55eaa7c1 SG |
545 | * Perform (stime * rtime) / total, but avoid multiplication overflow by |
546 | * loosing precision when the numbers are big. | |
d9a3c982 FW |
547 | */ |
548 | static cputime_t scale_stime(u64 stime, u64 rtime, u64 total) | |
73fbec60 | 549 | { |
55eaa7c1 | 550 | u64 scaled; |
73fbec60 | 551 | |
55eaa7c1 SG |
552 | for (;;) { |
553 | /* Make sure "rtime" is the bigger of stime/rtime */ | |
84f9f3a1 SG |
554 | if (stime > rtime) |
555 | swap(rtime, stime); | |
55eaa7c1 SG |
556 | |
557 | /* Make sure 'total' fits in 32 bits */ | |
558 | if (total >> 32) | |
559 | goto drop_precision; | |
560 | ||
561 | /* Does rtime (and thus stime) fit in 32 bits? */ | |
562 | if (!(rtime >> 32)) | |
563 | break; | |
564 | ||
565 | /* Can we just balance rtime/stime rather than dropping bits? */ | |
566 | if (stime >> 31) | |
567 | goto drop_precision; | |
568 | ||
569 | /* We can grow stime and shrink rtime and try to make them both fit */ | |
570 | stime <<= 1; | |
571 | rtime >>= 1; | |
572 | continue; | |
573 | ||
574 | drop_precision: | |
575 | /* We drop from rtime, it has more bits than stime */ | |
576 | rtime >>= 1; | |
577 | total >>= 1; | |
d9a3c982 | 578 | } |
73fbec60 | 579 | |
55eaa7c1 SG |
580 | /* |
581 | * Make sure gcc understands that this is a 32x32->64 multiply, | |
582 | * followed by a 64/32->64 divide. | |
583 | */ | |
584 | scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total); | |
d9a3c982 | 585 | return (__force cputime_t) scaled; |
73fbec60 FW |
586 | } |
587 | ||
347abad9 | 588 | /* |
9d7fb042 PZ |
589 | * Adjust tick based cputime random precision against scheduler runtime |
590 | * accounting. | |
347abad9 | 591 | * |
9d7fb042 PZ |
592 | * Tick based cputime accounting depend on random scheduling timeslices of a |
593 | * task to be interrupted or not by the timer. Depending on these | |
594 | * circumstances, the number of these interrupts may be over or | |
595 | * under-optimistic, matching the real user and system cputime with a variable | |
596 | * precision. | |
597 | * | |
598 | * Fix this by scaling these tick based values against the total runtime | |
599 | * accounted by the CFS scheduler. | |
600 | * | |
601 | * This code provides the following guarantees: | |
602 | * | |
603 | * stime + utime == rtime | |
604 | * stime_i+1 >= stime_i, utime_i+1 >= utime_i | |
605 | * | |
606 | * Assuming that rtime_i+1 >= rtime_i. | |
fa092057 | 607 | */ |
d37f761d | 608 | static void cputime_adjust(struct task_cputime *curr, |
9d7fb042 | 609 | struct prev_cputime *prev, |
d37f761d | 610 | cputime_t *ut, cputime_t *st) |
73fbec60 | 611 | { |
5a8e01f8 | 612 | cputime_t rtime, stime, utime; |
9d7fb042 | 613 | unsigned long flags; |
fa092057 | 614 | |
9d7fb042 PZ |
615 | /* Serialize concurrent callers such that we can honour our guarantees */ |
616 | raw_spin_lock_irqsave(&prev->lock, flags); | |
d37f761d | 617 | rtime = nsecs_to_cputime(curr->sum_exec_runtime); |
73fbec60 | 618 | |
772c808a | 619 | /* |
9d7fb042 PZ |
620 | * This is possible under two circumstances: |
621 | * - rtime isn't monotonic after all (a bug); | |
622 | * - we got reordered by the lock. | |
623 | * | |
624 | * In both cases this acts as a filter such that the rest of the code | |
625 | * can assume it is monotonic regardless of anything else. | |
772c808a SG |
626 | */ |
627 | if (prev->stime + prev->utime >= rtime) | |
628 | goto out; | |
629 | ||
5a8e01f8 SG |
630 | stime = curr->stime; |
631 | utime = curr->utime; | |
632 | ||
173be9a1 PZ |
633 | /* |
634 | * If either stime or both stime and utime are 0, assume all runtime is | |
635 | * userspace. Once a task gets some ticks, the monotonicy code at | |
636 | * 'update' will ensure things converge to the observed ratio. | |
637 | */ | |
638 | if (stime == 0) { | |
639 | utime = rtime; | |
9d7fb042 PZ |
640 | goto update; |
641 | } | |
5a8e01f8 | 642 | |
173be9a1 PZ |
643 | if (utime == 0) { |
644 | stime = rtime; | |
9d7fb042 | 645 | goto update; |
d9a3c982 | 646 | } |
73fbec60 | 647 | |
9d7fb042 PZ |
648 | stime = scale_stime((__force u64)stime, (__force u64)rtime, |
649 | (__force u64)(stime + utime)); | |
650 | ||
173be9a1 | 651 | update: |
9d7fb042 PZ |
652 | /* |
653 | * Make sure stime doesn't go backwards; this preserves monotonicity | |
654 | * for utime because rtime is monotonic. | |
655 | * | |
656 | * utime_i+1 = rtime_i+1 - stime_i | |
657 | * = rtime_i+1 - (rtime_i - utime_i) | |
658 | * = (rtime_i+1 - rtime_i) + utime_i | |
659 | * >= utime_i | |
660 | */ | |
661 | if (stime < prev->stime) | |
662 | stime = prev->stime; | |
663 | utime = rtime - stime; | |
664 | ||
665 | /* | |
666 | * Make sure utime doesn't go backwards; this still preserves | |
667 | * monotonicity for stime, analogous argument to above. | |
668 | */ | |
669 | if (utime < prev->utime) { | |
670 | utime = prev->utime; | |
671 | stime = rtime - utime; | |
672 | } | |
d37f761d | 673 | |
9d7fb042 PZ |
674 | prev->stime = stime; |
675 | prev->utime = utime; | |
772c808a | 676 | out: |
d37f761d FW |
677 | *ut = prev->utime; |
678 | *st = prev->stime; | |
9d7fb042 | 679 | raw_spin_unlock_irqrestore(&prev->lock, flags); |
d37f761d | 680 | } |
73fbec60 | 681 | |
d37f761d FW |
682 | void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
683 | { | |
684 | struct task_cputime cputime = { | |
d37f761d FW |
685 | .sum_exec_runtime = p->se.sum_exec_runtime, |
686 | }; | |
687 | ||
6fac4829 | 688 | task_cputime(p, &cputime.utime, &cputime.stime); |
d37f761d | 689 | cputime_adjust(&cputime, &p->prev_cputime, ut, st); |
73fbec60 | 690 | } |
9eec50b8 | 691 | EXPORT_SYMBOL_GPL(task_cputime_adjusted); |
73fbec60 | 692 | |
e80d0a1a | 693 | void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) |
73fbec60 | 694 | { |
73fbec60 | 695 | struct task_cputime cputime; |
73fbec60 FW |
696 | |
697 | thread_group_cputime(p, &cputime); | |
d37f761d | 698 | cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); |
73fbec60 | 699 | } |
9fbc42ea | 700 | #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ |
abf917cd FW |
701 | |
702 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | |
ff9a9b4c | 703 | static cputime_t vtime_delta(struct task_struct *tsk) |
6a61671b | 704 | { |
ff9a9b4c | 705 | unsigned long now = READ_ONCE(jiffies); |
6a61671b | 706 | |
ff9a9b4c | 707 | if (time_before(now, (unsigned long)tsk->vtime_snap)) |
6a61671b | 708 | return 0; |
abf917cd | 709 | |
ff9a9b4c | 710 | return jiffies_to_cputime(now - tsk->vtime_snap); |
6a61671b FW |
711 | } |
712 | ||
713 | static cputime_t get_vtime_delta(struct task_struct *tsk) | |
abf917cd | 714 | { |
ff9a9b4c | 715 | unsigned long now = READ_ONCE(jiffies); |
b58c3584 | 716 | cputime_t delta, other; |
abf917cd | 717 | |
03cbc732 WL |
718 | /* |
719 | * Unlike tick based timing, vtime based timing never has lost | |
720 | * ticks, and no need for steal time accounting to make up for | |
721 | * lost ticks. Vtime accounts a rounded version of actual | |
722 | * elapsed time. Limit account_other_time to prevent rounding | |
723 | * errors from causing elapsed vtime to go negative. | |
724 | */ | |
57430218 | 725 | delta = jiffies_to_cputime(now - tsk->vtime_snap); |
b58c3584 | 726 | other = account_other_time(delta); |
7098c1ea | 727 | WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); |
ff9a9b4c | 728 | tsk->vtime_snap = now; |
abf917cd | 729 | |
b58c3584 | 730 | return delta - other; |
abf917cd FW |
731 | } |
732 | ||
6a61671b FW |
733 | static void __vtime_account_system(struct task_struct *tsk) |
734 | { | |
735 | cputime_t delta_cpu = get_vtime_delta(tsk); | |
736 | ||
40565b5a | 737 | account_system_time(tsk, irq_count(), delta_cpu); |
6a61671b FW |
738 | } |
739 | ||
abf917cd FW |
740 | void vtime_account_system(struct task_struct *tsk) |
741 | { | |
ff9a9b4c RR |
742 | if (!vtime_delta(tsk)) |
743 | return; | |
744 | ||
b7ce2277 | 745 | write_seqcount_begin(&tsk->vtime_seqcount); |
6a61671b | 746 | __vtime_account_system(tsk); |
b7ce2277 | 747 | write_seqcount_end(&tsk->vtime_seqcount); |
6a61671b | 748 | } |
3f4724ea | 749 | |
abf917cd FW |
750 | void vtime_account_user(struct task_struct *tsk) |
751 | { | |
3f4724ea FW |
752 | cputime_t delta_cpu; |
753 | ||
b7ce2277 | 754 | write_seqcount_begin(&tsk->vtime_seqcount); |
6a61671b | 755 | tsk->vtime_snap_whence = VTIME_SYS; |
ff9a9b4c RR |
756 | if (vtime_delta(tsk)) { |
757 | delta_cpu = get_vtime_delta(tsk); | |
40565b5a | 758 | account_user_time(tsk, delta_cpu); |
ff9a9b4c | 759 | } |
b7ce2277 | 760 | write_seqcount_end(&tsk->vtime_seqcount); |
6a61671b FW |
761 | } |
762 | ||
763 | void vtime_user_enter(struct task_struct *tsk) | |
764 | { | |
b7ce2277 | 765 | write_seqcount_begin(&tsk->vtime_seqcount); |
ff9a9b4c RR |
766 | if (vtime_delta(tsk)) |
767 | __vtime_account_system(tsk); | |
af2350bd | 768 | tsk->vtime_snap_whence = VTIME_USER; |
b7ce2277 | 769 | write_seqcount_end(&tsk->vtime_seqcount); |
6a61671b FW |
770 | } |
771 | ||
772 | void vtime_guest_enter(struct task_struct *tsk) | |
773 | { | |
5b206d48 FW |
774 | /* |
775 | * The flags must be updated under the lock with | |
776 | * the vtime_snap flush and update. | |
777 | * That enforces a right ordering and update sequence | |
778 | * synchronization against the reader (task_gtime()) | |
779 | * that can thus safely catch up with a tickless delta. | |
780 | */ | |
b7ce2277 | 781 | write_seqcount_begin(&tsk->vtime_seqcount); |
ff9a9b4c RR |
782 | if (vtime_delta(tsk)) |
783 | __vtime_account_system(tsk); | |
6a61671b | 784 | current->flags |= PF_VCPU; |
b7ce2277 | 785 | write_seqcount_end(&tsk->vtime_seqcount); |
6a61671b | 786 | } |
48d6a816 | 787 | EXPORT_SYMBOL_GPL(vtime_guest_enter); |
6a61671b FW |
788 | |
789 | void vtime_guest_exit(struct task_struct *tsk) | |
790 | { | |
b7ce2277 | 791 | write_seqcount_begin(&tsk->vtime_seqcount); |
6a61671b FW |
792 | __vtime_account_system(tsk); |
793 | current->flags &= ~PF_VCPU; | |
b7ce2277 | 794 | write_seqcount_end(&tsk->vtime_seqcount); |
abf917cd | 795 | } |
48d6a816 | 796 | EXPORT_SYMBOL_GPL(vtime_guest_exit); |
abf917cd FW |
797 | |
798 | void vtime_account_idle(struct task_struct *tsk) | |
799 | { | |
6a61671b | 800 | cputime_t delta_cpu = get_vtime_delta(tsk); |
abf917cd FW |
801 | |
802 | account_idle_time(delta_cpu); | |
803 | } | |
3f4724ea | 804 | |
6a61671b FW |
805 | void arch_vtime_task_switch(struct task_struct *prev) |
806 | { | |
b7ce2277 | 807 | write_seqcount_begin(&prev->vtime_seqcount); |
7098c1ea | 808 | prev->vtime_snap_whence = VTIME_INACTIVE; |
b7ce2277 | 809 | write_seqcount_end(&prev->vtime_seqcount); |
6a61671b | 810 | |
b7ce2277 | 811 | write_seqcount_begin(¤t->vtime_seqcount); |
6a61671b | 812 | current->vtime_snap_whence = VTIME_SYS; |
ff9a9b4c | 813 | current->vtime_snap = jiffies; |
b7ce2277 | 814 | write_seqcount_end(¤t->vtime_seqcount); |
6a61671b FW |
815 | } |
816 | ||
45eacc69 | 817 | void vtime_init_idle(struct task_struct *t, int cpu) |
6a61671b FW |
818 | { |
819 | unsigned long flags; | |
820 | ||
b7ce2277 FW |
821 | local_irq_save(flags); |
822 | write_seqcount_begin(&t->vtime_seqcount); | |
6a61671b | 823 | t->vtime_snap_whence = VTIME_SYS; |
ff9a9b4c | 824 | t->vtime_snap = jiffies; |
b7ce2277 FW |
825 | write_seqcount_end(&t->vtime_seqcount); |
826 | local_irq_restore(flags); | |
6a61671b FW |
827 | } |
828 | ||
829 | cputime_t task_gtime(struct task_struct *t) | |
830 | { | |
6a61671b FW |
831 | unsigned int seq; |
832 | cputime_t gtime; | |
833 | ||
e5925394 | 834 | if (!vtime_accounting_enabled()) |
2541117b HS |
835 | return t->gtime; |
836 | ||
6a61671b | 837 | do { |
b7ce2277 | 838 | seq = read_seqcount_begin(&t->vtime_seqcount); |
6a61671b FW |
839 | |
840 | gtime = t->gtime; | |
cab245d6 | 841 | if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU) |
6a61671b FW |
842 | gtime += vtime_delta(t); |
843 | ||
b7ce2277 | 844 | } while (read_seqcount_retry(&t->vtime_seqcount, seq)); |
6a61671b FW |
845 | |
846 | return gtime; | |
847 | } | |
848 | ||
849 | /* | |
850 | * Fetch cputime raw values from fields of task_struct and | |
851 | * add up the pending nohz execution time since the last | |
852 | * cputime snapshot. | |
853 | */ | |
353c50eb | 854 | void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) |
6a61671b | 855 | { |
353c50eb | 856 | cputime_t delta; |
6a61671b | 857 | unsigned int seq; |
6a61671b | 858 | |
353c50eb SG |
859 | if (!vtime_accounting_enabled()) { |
860 | *utime = t->utime; | |
861 | *stime = t->stime; | |
862 | return; | |
863 | } | |
6a61671b | 864 | |
353c50eb | 865 | do { |
b7ce2277 | 866 | seq = read_seqcount_begin(&t->vtime_seqcount); |
6a61671b | 867 | |
353c50eb SG |
868 | *utime = t->utime; |
869 | *stime = t->stime; | |
6a61671b FW |
870 | |
871 | /* Task is sleeping, nothing to add */ | |
353c50eb | 872 | if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t)) |
6a61671b FW |
873 | continue; |
874 | ||
875 | delta = vtime_delta(t); | |
876 | ||
877 | /* | |
878 | * Task runs either in user or kernel space, add pending nohz time to | |
879 | * the right place. | |
880 | */ | |
353c50eb SG |
881 | if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) |
882 | *utime += delta; | |
883 | else if (t->vtime_snap_whence == VTIME_SYS) | |
884 | *stime += delta; | |
b7ce2277 | 885 | } while (read_seqcount_retry(&t->vtime_seqcount, seq)); |
6a61671b | 886 | } |
abf917cd | 887 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ |