]>
Commit | Line | Data |
---|---|---|
457c8996 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
325ea10c IM |
2 | /* |
3 | * Simple CPU accounting cgroup controller | |
4 | */ | |
73fbec60 | 5 | #include "sched.h" |
73fbec60 FW |
6 | |
7 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | |
8 | ||
9 | /* | |
10 | * There are no locks covering percpu hardirq/softirq time. | |
bf9fae9f | 11 | * They are only modified in vtime_account, on corresponding CPU |
73fbec60 FW |
12 | * with interrupts disabled. So, writes are safe. |
13 | * They are read and saved off onto struct rq in update_rq_clock(). | |
14 | * This may result in other CPU reading this CPU's irq time and can | |
bf9fae9f | 15 | * race with irq/vtime_account on this CPU. We would either get old |
73fbec60 FW |
16 | * or new value with a side effect of accounting a slice of irq time to wrong |
17 | * task when irq is in progress while we read rq->clock. That is a worthy | |
18 | * compromise in place of having locks on each irq in account_system_time. | |
19 | */ | |
19d23dbf | 20 | DEFINE_PER_CPU(struct irqtime, cpu_irqtime); |
73fbec60 | 21 | |
73fbec60 FW |
22 | static int sched_clock_irqtime; |
23 | ||
24 | void enable_sched_clock_irqtime(void) | |
25 | { | |
26 | sched_clock_irqtime = 1; | |
27 | } | |
28 | ||
29 | void disable_sched_clock_irqtime(void) | |
30 | { | |
31 | sched_clock_irqtime = 0; | |
32 | } | |
33 | ||
25e2d8c1 FW |
34 | static void irqtime_account_delta(struct irqtime *irqtime, u64 delta, |
35 | enum cpu_usage_stat idx) | |
36 | { | |
37 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
38 | ||
39 | u64_stats_update_begin(&irqtime->sync); | |
40 | cpustat[idx] += delta; | |
41 | irqtime->total += delta; | |
42 | irqtime->tick_delta += delta; | |
43 | u64_stats_update_end(&irqtime->sync); | |
44 | } | |
45 | ||
73fbec60 | 46 | /* |
d3759e71 | 47 | * Called after incrementing preempt_count on {soft,}irq_enter |
73fbec60 FW |
48 | * and before decrementing preempt_count on {soft,}irq_exit. |
49 | */ | |
d3759e71 | 50 | void irqtime_account_irq(struct task_struct *curr, unsigned int offset) |
73fbec60 | 51 | { |
19d23dbf | 52 | struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); |
d3759e71 | 53 | unsigned int pc; |
73fbec60 FW |
54 | s64 delta; |
55 | int cpu; | |
56 | ||
57 | if (!sched_clock_irqtime) | |
58 | return; | |
59 | ||
73fbec60 | 60 | cpu = smp_processor_id(); |
19d23dbf FW |
61 | delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; |
62 | irqtime->irq_start_time += delta; | |
d3759e71 | 63 | pc = preempt_count() - offset; |
73fbec60 | 64 | |
73fbec60 FW |
65 | /* |
66 | * We do not account for softirq time from ksoftirqd here. | |
67 | * We want to continue accounting softirq time to ksoftirqd thread | |
68 | * in that case, so as not to confuse scheduler with a special task | |
69 | * that do not consume any time, but still wants to run. | |
70 | */ | |
d3759e71 | 71 | if (pc & HARDIRQ_MASK) |
25e2d8c1 | 72 | irqtime_account_delta(irqtime, delta, CPUTIME_IRQ); |
d3759e71 | 73 | else if ((pc & SOFTIRQ_OFFSET) && curr != this_cpu_ksoftirqd()) |
25e2d8c1 | 74 | irqtime_account_delta(irqtime, delta, CPUTIME_SOFTIRQ); |
73fbec60 | 75 | } |
73fbec60 | 76 | |
2b1f967d | 77 | static u64 irqtime_tick_accounted(u64 maxtime) |
73fbec60 | 78 | { |
a499a5a1 | 79 | struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); |
2b1f967d | 80 | u64 delta; |
73fbec60 | 81 | |
2b1f967d FW |
82 | delta = min(irqtime->tick_delta, maxtime); |
83 | irqtime->tick_delta -= delta; | |
2810f611 | 84 | |
a499a5a1 | 85 | return delta; |
73fbec60 FW |
86 | } |
87 | ||
88 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | |
89 | ||
90 | #define sched_clock_irqtime (0) | |
91 | ||
2b1f967d | 92 | static u64 irqtime_tick_accounted(u64 dummy) |
57430218 RR |
93 | { |
94 | return 0; | |
95 | } | |
96 | ||
73fbec60 FW |
97 | #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ |
98 | ||
99 | static inline void task_group_account_field(struct task_struct *p, int index, | |
100 | u64 tmp) | |
101 | { | |
73fbec60 FW |
102 | /* |
103 | * Since all updates are sure to touch the root cgroup, we | |
104 | * get ourselves ahead and touch it first. If the root cgroup | |
105 | * is the only cgroup, then nothing else should be necessary. | |
106 | * | |
107 | */ | |
a4f61cc0 | 108 | __this_cpu_add(kernel_cpustat.cpustat[index], tmp); |
73fbec60 | 109 | |
d2cc5ed6 | 110 | cgroup_account_cputime_field(p, index, tmp); |
73fbec60 FW |
111 | } |
112 | ||
113 | /* | |
97fb7a0a IM |
114 | * Account user CPU time to a process. |
115 | * @p: the process that the CPU time gets accounted to | |
116 | * @cputime: the CPU time spent in user space since the last update | |
73fbec60 | 117 | */ |
23244a5c | 118 | void account_user_time(struct task_struct *p, u64 cputime) |
73fbec60 FW |
119 | { |
120 | int index; | |
121 | ||
122 | /* Add user time to process. */ | |
23244a5c FW |
123 | p->utime += cputime; |
124 | account_group_user_time(p, cputime); | |
73fbec60 | 125 | |
d0ea0268 | 126 | index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; |
73fbec60 FW |
127 | |
128 | /* Add user time to cpustat. */ | |
23244a5c | 129 | task_group_account_field(p, index, cputime); |
73fbec60 FW |
130 | |
131 | /* Account for user time used */ | |
6fac4829 | 132 | acct_account_cputime(p); |
73fbec60 FW |
133 | } |
134 | ||
135 | /* | |
97fb7a0a IM |
136 | * Account guest CPU time to a process. |
137 | * @p: the process that the CPU time gets accounted to | |
138 | * @cputime: the CPU time spent in virtual machine since the last update | |
73fbec60 | 139 | */ |
fb8b049c | 140 | void account_guest_time(struct task_struct *p, u64 cputime) |
73fbec60 FW |
141 | { |
142 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
143 | ||
144 | /* Add guest time to process. */ | |
fb8b049c FW |
145 | p->utime += cputime; |
146 | account_group_user_time(p, cputime); | |
147 | p->gtime += cputime; | |
73fbec60 FW |
148 | |
149 | /* Add guest time to cpustat. */ | |
d0ea0268 | 150 | if (task_nice(p) > 0) { |
fb8b049c FW |
151 | cpustat[CPUTIME_NICE] += cputime; |
152 | cpustat[CPUTIME_GUEST_NICE] += cputime; | |
73fbec60 | 153 | } else { |
fb8b049c FW |
154 | cpustat[CPUTIME_USER] += cputime; |
155 | cpustat[CPUTIME_GUEST] += cputime; | |
73fbec60 FW |
156 | } |
157 | } | |
158 | ||
159 | /* | |
97fb7a0a IM |
160 | * Account system CPU time to a process and desired cpustat field |
161 | * @p: the process that the CPU time gets accounted to | |
162 | * @cputime: the CPU time spent in kernel space since the last update | |
40565b5a | 163 | * @index: pointer to cpustat field that has to be updated |
73fbec60 | 164 | */ |
c31cc6a5 | 165 | void account_system_index_time(struct task_struct *p, |
fb8b049c | 166 | u64 cputime, enum cpu_usage_stat index) |
73fbec60 FW |
167 | { |
168 | /* Add system time to process. */ | |
fb8b049c FW |
169 | p->stime += cputime; |
170 | account_group_system_time(p, cputime); | |
73fbec60 FW |
171 | |
172 | /* Add system time to cpustat. */ | |
fb8b049c | 173 | task_group_account_field(p, index, cputime); |
73fbec60 FW |
174 | |
175 | /* Account for system time used */ | |
6fac4829 | 176 | acct_account_cputime(p); |
73fbec60 FW |
177 | } |
178 | ||
179 | /* | |
97fb7a0a IM |
180 | * Account system CPU time to a process. |
181 | * @p: the process that the CPU time gets accounted to | |
73fbec60 | 182 | * @hardirq_offset: the offset to subtract from hardirq_count() |
97fb7a0a | 183 | * @cputime: the CPU time spent in kernel space since the last update |
73fbec60 | 184 | */ |
fb8b049c | 185 | void account_system_time(struct task_struct *p, int hardirq_offset, u64 cputime) |
73fbec60 FW |
186 | { |
187 | int index; | |
188 | ||
189 | if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { | |
40565b5a | 190 | account_guest_time(p, cputime); |
73fbec60 FW |
191 | return; |
192 | } | |
193 | ||
194 | if (hardirq_count() - hardirq_offset) | |
195 | index = CPUTIME_IRQ; | |
196 | else if (in_serving_softirq()) | |
197 | index = CPUTIME_SOFTIRQ; | |
198 | else | |
199 | index = CPUTIME_SYSTEM; | |
200 | ||
c31cc6a5 | 201 | account_system_index_time(p, cputime, index); |
73fbec60 FW |
202 | } |
203 | ||
204 | /* | |
205 | * Account for involuntary wait time. | |
97fb7a0a | 206 | * @cputime: the CPU time spent in involuntary wait |
73fbec60 | 207 | */ |
be9095ed | 208 | void account_steal_time(u64 cputime) |
73fbec60 FW |
209 | { |
210 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
211 | ||
be9095ed | 212 | cpustat[CPUTIME_STEAL] += cputime; |
73fbec60 FW |
213 | } |
214 | ||
215 | /* | |
216 | * Account for idle time. | |
97fb7a0a | 217 | * @cputime: the CPU time spent in idle wait |
73fbec60 | 218 | */ |
18b43a9b | 219 | void account_idle_time(u64 cputime) |
73fbec60 FW |
220 | { |
221 | u64 *cpustat = kcpustat_this_cpu->cpustat; | |
222 | struct rq *rq = this_rq(); | |
223 | ||
224 | if (atomic_read(&rq->nr_iowait) > 0) | |
18b43a9b | 225 | cpustat[CPUTIME_IOWAIT] += cputime; |
73fbec60 | 226 | else |
18b43a9b | 227 | cpustat[CPUTIME_IDLE] += cputime; |
73fbec60 FW |
228 | } |
229 | ||
03cbc732 WL |
230 | /* |
231 | * When a guest is interrupted for a longer amount of time, missed clock | |
232 | * ticks are not redelivered later. Due to that, this function may on | |
233 | * occasion account more time than the calling functions think elapsed. | |
234 | */ | |
2b1f967d | 235 | static __always_inline u64 steal_account_process_time(u64 maxtime) |
73fbec60 FW |
236 | { |
237 | #ifdef CONFIG_PARAVIRT | |
238 | if (static_key_false(¶virt_steal_enabled)) { | |
2b1f967d | 239 | u64 steal; |
73fbec60 FW |
240 | |
241 | steal = paravirt_steal_clock(smp_processor_id()); | |
242 | steal -= this_rq()->prev_steal_time; | |
2b1f967d FW |
243 | steal = min(steal, maxtime); |
244 | account_steal_time(steal); | |
245 | this_rq()->prev_steal_time += steal; | |
73fbec60 | 246 | |
2b1f967d | 247 | return steal; |
73fbec60 FW |
248 | } |
249 | #endif | |
807e5b80 | 250 | return 0; |
73fbec60 FW |
251 | } |
252 | ||
57430218 RR |
253 | /* |
254 | * Account how much elapsed time was spent in steal, irq, or softirq time. | |
255 | */ | |
2b1f967d | 256 | static inline u64 account_other_time(u64 max) |
57430218 | 257 | { |
2b1f967d | 258 | u64 accounted; |
57430218 | 259 | |
2c11dba0 | 260 | lockdep_assert_irqs_disabled(); |
2810f611 | 261 | |
57430218 RR |
262 | accounted = steal_account_process_time(max); |
263 | ||
264 | if (accounted < max) | |
a499a5a1 | 265 | accounted += irqtime_tick_accounted(max - accounted); |
57430218 RR |
266 | |
267 | return accounted; | |
268 | } | |
269 | ||
a1eb1411 SG |
270 | #ifdef CONFIG_64BIT |
271 | static inline u64 read_sum_exec_runtime(struct task_struct *t) | |
272 | { | |
273 | return t->se.sum_exec_runtime; | |
274 | } | |
275 | #else | |
276 | static u64 read_sum_exec_runtime(struct task_struct *t) | |
277 | { | |
278 | u64 ns; | |
279 | struct rq_flags rf; | |
280 | struct rq *rq; | |
281 | ||
282 | rq = task_rq_lock(t, &rf); | |
283 | ns = t->se.sum_exec_runtime; | |
284 | task_rq_unlock(rq, t, &rf); | |
285 | ||
286 | return ns; | |
287 | } | |
288 | #endif | |
289 | ||
a634f933 FW |
290 | /* |
291 | * Accumulate raw cputime values of dead tasks (sig->[us]time) and live | |
292 | * tasks (sum on group iteration) belonging to @tsk's group. | |
293 | */ | |
294 | void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) | |
295 | { | |
296 | struct signal_struct *sig = tsk->signal; | |
5613fda9 | 297 | u64 utime, stime; |
a634f933 | 298 | struct task_struct *t; |
e78c3496 | 299 | unsigned int seq, nextseq; |
9c368b5b | 300 | unsigned long flags; |
a634f933 | 301 | |
a1eb1411 SG |
302 | /* |
303 | * Update current task runtime to account pending time since last | |
304 | * scheduler action or thread_group_cputime() call. This thread group | |
305 | * might have other running tasks on different CPUs, but updating | |
306 | * their runtime can affect syscall performance, so we skip account | |
307 | * those pending times and rely only on values updated on tick or | |
308 | * other scheduler action. | |
309 | */ | |
310 | if (same_thread_group(current, tsk)) | |
311 | (void) task_sched_runtime(current); | |
312 | ||
a634f933 | 313 | rcu_read_lock(); |
e78c3496 RR |
314 | /* Attempt a lockless read on the first round. */ |
315 | nextseq = 0; | |
316 | do { | |
317 | seq = nextseq; | |
9c368b5b | 318 | flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); |
e78c3496 RR |
319 | times->utime = sig->utime; |
320 | times->stime = sig->stime; | |
321 | times->sum_exec_runtime = sig->sum_sched_runtime; | |
322 | ||
323 | for_each_thread(tsk, t) { | |
324 | task_cputime(t, &utime, &stime); | |
325 | times->utime += utime; | |
326 | times->stime += stime; | |
a1eb1411 | 327 | times->sum_exec_runtime += read_sum_exec_runtime(t); |
e78c3496 RR |
328 | } |
329 | /* If lockless access failed, take the lock. */ | |
330 | nextseq = 1; | |
331 | } while (need_seqretry(&sig->stats_lock, seq)); | |
9c368b5b | 332 | done_seqretry_irqrestore(&sig->stats_lock, seq, flags); |
a634f933 FW |
333 | rcu_read_unlock(); |
334 | } | |
335 | ||
73fbec60 FW |
336 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
337 | /* | |
338 | * Account a tick to a process and cpustat | |
97fb7a0a | 339 | * @p: the process that the CPU time gets accounted to |
73fbec60 FW |
340 | * @user_tick: is the tick from userspace |
341 | * @rq: the pointer to rq | |
342 | * | |
343 | * Tick demultiplexing follows the order | |
344 | * - pending hardirq update | |
345 | * - pending softirq update | |
346 | * - user_time | |
347 | * - idle_time | |
348 | * - system time | |
349 | * - check for guest_time | |
350 | * - else account as system_time | |
351 | * | |
352 | * Check for hardirq is done both for system and user time as there is | |
353 | * no timer going off while we are on hardirq and hence we may never get an | |
354 | * opportunity to update it solely in system time. | |
355 | * p->stime and friends are only updated on system time and not on irq | |
356 | * softirq as those do not count in task exec_runtime any more. | |
357 | */ | |
358 | static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | |
9dec1b69 | 359 | int ticks) |
73fbec60 | 360 | { |
2b1f967d | 361 | u64 other, cputime = TICK_NSEC * ticks; |
73fbec60 | 362 | |
57430218 RR |
363 | /* |
364 | * When returning from idle, many ticks can get accounted at | |
365 | * once, including some ticks of steal, irq, and softirq time. | |
366 | * Subtract those ticks from the amount of time accounted to | |
367 | * idle, or potentially user or system time. Due to rounding, | |
368 | * other time can exceed ticks occasionally. | |
369 | */ | |
03cbc732 | 370 | other = account_other_time(ULONG_MAX); |
2b1f967d | 371 | if (other >= cputime) |
73fbec60 | 372 | return; |
23244a5c | 373 | |
2b1f967d | 374 | cputime -= other; |
73fbec60 | 375 | |
57430218 | 376 | if (this_cpu_ksoftirqd() == p) { |
73fbec60 FW |
377 | /* |
378 | * ksoftirqd time do not get accounted in cpu_softirq_time. | |
379 | * So, we have to handle it separately here. | |
380 | * Also, p->stime needs to be updated for ksoftirqd. | |
381 | */ | |
fb8b049c | 382 | account_system_index_time(p, cputime, CPUTIME_SOFTIRQ); |
73fbec60 | 383 | } else if (user_tick) { |
40565b5a | 384 | account_user_time(p, cputime); |
9dec1b69 | 385 | } else if (p == this_rq()->idle) { |
18b43a9b | 386 | account_idle_time(cputime); |
73fbec60 | 387 | } else if (p->flags & PF_VCPU) { /* System time or guest time */ |
fb8b049c | 388 | account_guest_time(p, cputime); |
73fbec60 | 389 | } else { |
fb8b049c | 390 | account_system_index_time(p, cputime, CPUTIME_SYSTEM); |
73fbec60 FW |
391 | } |
392 | } | |
393 | ||
394 | static void irqtime_account_idle_ticks(int ticks) | |
395 | { | |
9dec1b69 | 396 | irqtime_account_process_tick(current, 0, ticks); |
73fbec60 FW |
397 | } |
398 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | |
97fb7a0a | 399 | static inline void irqtime_account_idle_ticks(int ticks) { } |
3f4724ea | 400 | static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, |
9dec1b69 | 401 | int nr_ticks) { } |
73fbec60 FW |
402 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
403 | ||
73fbec60 FW |
404 | /* |
405 | * Use precise platform statistics if available: | |
406 | */ | |
8d495477 FW |
407 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE |
408 | ||
97fb7a0a | 409 | # ifndef __ARCH_HAS_VTIME_TASK_SWITCH |
8d495477 | 410 | void vtime_task_switch(struct task_struct *prev) |
e3942ba0 FW |
411 | { |
412 | if (is_idle_task(prev)) | |
413 | vtime_account_idle(prev); | |
414 | else | |
f83eeb1a | 415 | vtime_account_kernel(prev); |
e3942ba0 | 416 | |
c8d7dabf | 417 | vtime_flush(prev); |
e3942ba0 FW |
418 | arch_vtime_task_switch(prev); |
419 | } | |
97fb7a0a | 420 | # endif |
0cfdf9a1 | 421 | |
d3759e71 | 422 | void vtime_account_irq(struct task_struct *tsk, unsigned int offset) |
a7e1a9e3 | 423 | { |
d3759e71 FW |
424 | unsigned int pc = preempt_count() - offset; |
425 | ||
426 | if (pc & HARDIRQ_OFFSET) { | |
8a6a5920 | 427 | vtime_account_hardirq(tsk); |
d3759e71 | 428 | } else if (pc & SOFTIRQ_OFFSET) { |
8a6a5920 FW |
429 | vtime_account_softirq(tsk); |
430 | } else if (!IS_ENABLED(CONFIG_HAVE_VIRT_CPU_ACCOUNTING_IDLE) && | |
431 | is_idle_task(tsk)) { | |
0cfdf9a1 | 432 | vtime_account_idle(tsk); |
8a6a5920 | 433 | } else { |
f83eeb1a | 434 | vtime_account_kernel(tsk); |
8a6a5920 | 435 | } |
a7e1a9e3 | 436 | } |
9fbc42ea | 437 | |
8157a7fa TH |
438 | void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, |
439 | u64 *ut, u64 *st) | |
440 | { | |
441 | *ut = curr->utime; | |
442 | *st = curr->stime; | |
443 | } | |
444 | ||
5613fda9 | 445 | void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) |
9fbc42ea FW |
446 | { |
447 | *ut = p->utime; | |
448 | *st = p->stime; | |
449 | } | |
9eec50b8 | 450 | EXPORT_SYMBOL_GPL(task_cputime_adjusted); |
a7e1a9e3 | 451 | |
5613fda9 | 452 | void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) |
9fbc42ea FW |
453 | { |
454 | struct task_cputime cputime; | |
73fbec60 | 455 | |
9fbc42ea FW |
456 | thread_group_cputime(p, &cputime); |
457 | ||
458 | *ut = cputime.utime; | |
459 | *st = cputime.stime; | |
460 | } | |
97fb7a0a IM |
461 | |
462 | #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE: */ | |
463 | ||
9fbc42ea | 464 | /* |
97fb7a0a IM |
465 | * Account a single tick of CPU time. |
466 | * @p: the process that the CPU time gets accounted to | |
9fbc42ea FW |
467 | * @user_tick: indicates if the tick is a user or a system tick |
468 | */ | |
469 | void account_process_tick(struct task_struct *p, int user_tick) | |
73fbec60 | 470 | { |
2b1f967d | 471 | u64 cputime, steal; |
73fbec60 | 472 | |
e44fcb4b | 473 | if (vtime_accounting_enabled_this_cpu()) |
9fbc42ea FW |
474 | return; |
475 | ||
476 | if (sched_clock_irqtime) { | |
9dec1b69 | 477 | irqtime_account_process_tick(p, user_tick, 1); |
9fbc42ea FW |
478 | return; |
479 | } | |
480 | ||
2b1f967d | 481 | cputime = TICK_NSEC; |
03cbc732 | 482 | steal = steal_account_process_time(ULONG_MAX); |
57430218 | 483 | |
2b1f967d | 484 | if (steal >= cputime) |
9fbc42ea | 485 | return; |
73fbec60 | 486 | |
2b1f967d | 487 | cputime -= steal; |
57430218 | 488 | |
9fbc42ea | 489 | if (user_tick) |
40565b5a | 490 | account_user_time(p, cputime); |
9dec1b69 | 491 | else if ((p != this_rq()->idle) || (irq_count() != HARDIRQ_OFFSET)) |
fb8b049c | 492 | account_system_time(p, HARDIRQ_OFFSET, cputime); |
73fbec60 | 493 | else |
18b43a9b | 494 | account_idle_time(cputime); |
9fbc42ea | 495 | } |
73fbec60 | 496 | |
9fbc42ea FW |
497 | /* |
498 | * Account multiple ticks of idle time. | |
499 | * @ticks: number of stolen ticks | |
500 | */ | |
501 | void account_idle_ticks(unsigned long ticks) | |
502 | { | |
18b43a9b | 503 | u64 cputime, steal; |
26f2c75c | 504 | |
9fbc42ea FW |
505 | if (sched_clock_irqtime) { |
506 | irqtime_account_idle_ticks(ticks); | |
507 | return; | |
508 | } | |
509 | ||
18b43a9b | 510 | cputime = ticks * TICK_NSEC; |
2b1f967d | 511 | steal = steal_account_process_time(ULONG_MAX); |
f9bcf1e0 WL |
512 | |
513 | if (steal >= cputime) | |
514 | return; | |
515 | ||
516 | cputime -= steal; | |
517 | account_idle_time(cputime); | |
9fbc42ea | 518 | } |
73fbec60 | 519 | |
347abad9 | 520 | /* |
9d7fb042 PZ |
521 | * Adjust tick based cputime random precision against scheduler runtime |
522 | * accounting. | |
347abad9 | 523 | * |
9d7fb042 PZ |
524 | * Tick based cputime accounting depend on random scheduling timeslices of a |
525 | * task to be interrupted or not by the timer. Depending on these | |
526 | * circumstances, the number of these interrupts may be over or | |
527 | * under-optimistic, matching the real user and system cputime with a variable | |
528 | * precision. | |
529 | * | |
530 | * Fix this by scaling these tick based values against the total runtime | |
531 | * accounted by the CFS scheduler. | |
532 | * | |
533 | * This code provides the following guarantees: | |
534 | * | |
535 | * stime + utime == rtime | |
536 | * stime_i+1 >= stime_i, utime_i+1 >= utime_i | |
537 | * | |
538 | * Assuming that rtime_i+1 >= rtime_i. | |
fa092057 | 539 | */ |
cfb766da TH |
540 | void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, |
541 | u64 *ut, u64 *st) | |
73fbec60 | 542 | { |
5613fda9 | 543 | u64 rtime, stime, utime; |
9d7fb042 | 544 | unsigned long flags; |
fa092057 | 545 | |
9d7fb042 PZ |
546 | /* Serialize concurrent callers such that we can honour our guarantees */ |
547 | raw_spin_lock_irqsave(&prev->lock, flags); | |
5613fda9 | 548 | rtime = curr->sum_exec_runtime; |
73fbec60 | 549 | |
772c808a | 550 | /* |
9d7fb042 PZ |
551 | * This is possible under two circumstances: |
552 | * - rtime isn't monotonic after all (a bug); | |
553 | * - we got reordered by the lock. | |
554 | * | |
555 | * In both cases this acts as a filter such that the rest of the code | |
556 | * can assume it is monotonic regardless of anything else. | |
772c808a SG |
557 | */ |
558 | if (prev->stime + prev->utime >= rtime) | |
559 | goto out; | |
560 | ||
5a8e01f8 SG |
561 | stime = curr->stime; |
562 | utime = curr->utime; | |
563 | ||
173be9a1 | 564 | /* |
3b9c08ae IM |
565 | * If either stime or utime are 0, assume all runtime is userspace. |
566 | * Once a task gets some ticks, the monotonicy code at 'update:' | |
567 | * will ensure things converge to the observed ratio. | |
173be9a1 | 568 | */ |
3b9c08ae IM |
569 | if (stime == 0) { |
570 | utime = rtime; | |
571 | goto update; | |
9d7fb042 | 572 | } |
5a8e01f8 | 573 | |
3b9c08ae IM |
574 | if (utime == 0) { |
575 | stime = rtime; | |
576 | goto update; | |
577 | } | |
578 | ||
3dc167ba | 579 | stime = mul_u64_u64_div_u64(stime, rtime, stime + utime); |
3b9c08ae IM |
580 | |
581 | update: | |
9d7fb042 PZ |
582 | /* |
583 | * Make sure stime doesn't go backwards; this preserves monotonicity | |
584 | * for utime because rtime is monotonic. | |
585 | * | |
586 | * utime_i+1 = rtime_i+1 - stime_i | |
587 | * = rtime_i+1 - (rtime_i - utime_i) | |
588 | * = (rtime_i+1 - rtime_i) + utime_i | |
589 | * >= utime_i | |
590 | */ | |
591 | if (stime < prev->stime) | |
592 | stime = prev->stime; | |
593 | utime = rtime - stime; | |
594 | ||
595 | /* | |
596 | * Make sure utime doesn't go backwards; this still preserves | |
597 | * monotonicity for stime, analogous argument to above. | |
598 | */ | |
599 | if (utime < prev->utime) { | |
600 | utime = prev->utime; | |
601 | stime = rtime - utime; | |
602 | } | |
d37f761d | 603 | |
9d7fb042 PZ |
604 | prev->stime = stime; |
605 | prev->utime = utime; | |
772c808a | 606 | out: |
d37f761d FW |
607 | *ut = prev->utime; |
608 | *st = prev->stime; | |
9d7fb042 | 609 | raw_spin_unlock_irqrestore(&prev->lock, flags); |
d37f761d | 610 | } |
73fbec60 | 611 | |
5613fda9 | 612 | void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) |
d37f761d FW |
613 | { |
614 | struct task_cputime cputime = { | |
d37f761d FW |
615 | .sum_exec_runtime = p->se.sum_exec_runtime, |
616 | }; | |
617 | ||
6fac4829 | 618 | task_cputime(p, &cputime.utime, &cputime.stime); |
d37f761d | 619 | cputime_adjust(&cputime, &p->prev_cputime, ut, st); |
73fbec60 | 620 | } |
9eec50b8 | 621 | EXPORT_SYMBOL_GPL(task_cputime_adjusted); |
73fbec60 | 622 | |
5613fda9 | 623 | void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) |
73fbec60 | 624 | { |
73fbec60 | 625 | struct task_cputime cputime; |
73fbec60 FW |
626 | |
627 | thread_group_cputime(p, &cputime); | |
d37f761d | 628 | cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); |
73fbec60 | 629 | } |
9fbc42ea | 630 | #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ |
abf917cd FW |
631 | |
632 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN | |
bac5b6b6 | 633 | static u64 vtime_delta(struct vtime *vtime) |
6a61671b | 634 | { |
2a42eb95 | 635 | unsigned long long clock; |
6a61671b | 636 | |
0e4097c3 | 637 | clock = sched_clock(); |
2a42eb95 | 638 | if (clock < vtime->starttime) |
6a61671b | 639 | return 0; |
abf917cd | 640 | |
2a42eb95 | 641 | return clock - vtime->starttime; |
6a61671b FW |
642 | } |
643 | ||
bac5b6b6 | 644 | static u64 get_vtime_delta(struct vtime *vtime) |
abf917cd | 645 | { |
2a42eb95 WL |
646 | u64 delta = vtime_delta(vtime); |
647 | u64 other; | |
abf917cd | 648 | |
03cbc732 WL |
649 | /* |
650 | * Unlike tick based timing, vtime based timing never has lost | |
651 | * ticks, and no need for steal time accounting to make up for | |
652 | * lost ticks. Vtime accounts a rounded version of actual | |
653 | * elapsed time. Limit account_other_time to prevent rounding | |
654 | * errors from causing elapsed vtime to go negative. | |
655 | */ | |
b58c3584 | 656 | other = account_other_time(delta); |
bac5b6b6 | 657 | WARN_ON_ONCE(vtime->state == VTIME_INACTIVE); |
2a42eb95 | 658 | vtime->starttime += delta; |
abf917cd | 659 | |
b58c3584 | 660 | return delta - other; |
abf917cd FW |
661 | } |
662 | ||
f83eeb1a FW |
663 | static void vtime_account_system(struct task_struct *tsk, |
664 | struct vtime *vtime) | |
6a61671b | 665 | { |
2a42eb95 WL |
666 | vtime->stime += get_vtime_delta(vtime); |
667 | if (vtime->stime >= TICK_NSEC) { | |
668 | account_system_time(tsk, irq_count(), vtime->stime); | |
669 | vtime->stime = 0; | |
670 | } | |
671 | } | |
672 | ||
673 | static void vtime_account_guest(struct task_struct *tsk, | |
674 | struct vtime *vtime) | |
675 | { | |
676 | vtime->gtime += get_vtime_delta(vtime); | |
677 | if (vtime->gtime >= TICK_NSEC) { | |
678 | account_guest_time(tsk, vtime->gtime); | |
679 | vtime->gtime = 0; | |
680 | } | |
6a61671b FW |
681 | } |
682 | ||
8d495477 FW |
683 | static void __vtime_account_kernel(struct task_struct *tsk, |
684 | struct vtime *vtime) | |
685 | { | |
686 | /* We might have scheduled out from guest path */ | |
e6d5bf3e | 687 | if (vtime->state == VTIME_GUEST) |
8d495477 FW |
688 | vtime_account_guest(tsk, vtime); |
689 | else | |
690 | vtime_account_system(tsk, vtime); | |
691 | } | |
692 | ||
f83eeb1a | 693 | void vtime_account_kernel(struct task_struct *tsk) |
abf917cd | 694 | { |
bac5b6b6 FW |
695 | struct vtime *vtime = &tsk->vtime; |
696 | ||
697 | if (!vtime_delta(vtime)) | |
ff9a9b4c RR |
698 | return; |
699 | ||
bac5b6b6 | 700 | write_seqcount_begin(&vtime->seqcount); |
8d495477 | 701 | __vtime_account_kernel(tsk, vtime); |
bac5b6b6 | 702 | write_seqcount_end(&vtime->seqcount); |
6a61671b | 703 | } |
3f4724ea | 704 | |
1c3eda01 | 705 | void vtime_user_enter(struct task_struct *tsk) |
abf917cd | 706 | { |
bac5b6b6 FW |
707 | struct vtime *vtime = &tsk->vtime; |
708 | ||
709 | write_seqcount_begin(&vtime->seqcount); | |
f83eeb1a | 710 | vtime_account_system(tsk, vtime); |
bac5b6b6 FW |
711 | vtime->state = VTIME_USER; |
712 | write_seqcount_end(&vtime->seqcount); | |
6a61671b FW |
713 | } |
714 | ||
1c3eda01 | 715 | void vtime_user_exit(struct task_struct *tsk) |
6a61671b | 716 | { |
bac5b6b6 FW |
717 | struct vtime *vtime = &tsk->vtime; |
718 | ||
719 | write_seqcount_begin(&vtime->seqcount); | |
2a42eb95 WL |
720 | vtime->utime += get_vtime_delta(vtime); |
721 | if (vtime->utime >= TICK_NSEC) { | |
722 | account_user_time(tsk, vtime->utime); | |
723 | vtime->utime = 0; | |
724 | } | |
bac5b6b6 FW |
725 | vtime->state = VTIME_SYS; |
726 | write_seqcount_end(&vtime->seqcount); | |
6a61671b FW |
727 | } |
728 | ||
729 | void vtime_guest_enter(struct task_struct *tsk) | |
730 | { | |
bac5b6b6 | 731 | struct vtime *vtime = &tsk->vtime; |
5b206d48 FW |
732 | /* |
733 | * The flags must be updated under the lock with | |
60a9ce57 | 734 | * the vtime_starttime flush and update. |
5b206d48 FW |
735 | * That enforces a right ordering and update sequence |
736 | * synchronization against the reader (task_gtime()) | |
737 | * that can thus safely catch up with a tickless delta. | |
738 | */ | |
bac5b6b6 | 739 | write_seqcount_begin(&vtime->seqcount); |
f83eeb1a | 740 | vtime_account_system(tsk, vtime); |
68e7a4d6 | 741 | tsk->flags |= PF_VCPU; |
e6d5bf3e | 742 | vtime->state = VTIME_GUEST; |
bac5b6b6 | 743 | write_seqcount_end(&vtime->seqcount); |
6a61671b | 744 | } |
48d6a816 | 745 | EXPORT_SYMBOL_GPL(vtime_guest_enter); |
6a61671b FW |
746 | |
747 | void vtime_guest_exit(struct task_struct *tsk) | |
748 | { | |
bac5b6b6 FW |
749 | struct vtime *vtime = &tsk->vtime; |
750 | ||
751 | write_seqcount_begin(&vtime->seqcount); | |
2a42eb95 | 752 | vtime_account_guest(tsk, vtime); |
68e7a4d6 | 753 | tsk->flags &= ~PF_VCPU; |
e6d5bf3e | 754 | vtime->state = VTIME_SYS; |
bac5b6b6 | 755 | write_seqcount_end(&vtime->seqcount); |
abf917cd | 756 | } |
48d6a816 | 757 | EXPORT_SYMBOL_GPL(vtime_guest_exit); |
abf917cd FW |
758 | |
759 | void vtime_account_idle(struct task_struct *tsk) | |
760 | { | |
bac5b6b6 | 761 | account_idle_time(get_vtime_delta(&tsk->vtime)); |
abf917cd | 762 | } |
3f4724ea | 763 | |
8d495477 | 764 | void vtime_task_switch_generic(struct task_struct *prev) |
6a61671b | 765 | { |
bac5b6b6 | 766 | struct vtime *vtime = &prev->vtime; |
6a61671b | 767 | |
bac5b6b6 | 768 | write_seqcount_begin(&vtime->seqcount); |
14faf6fc | 769 | if (vtime->state == VTIME_IDLE) |
8d495477 FW |
770 | vtime_account_idle(prev); |
771 | else | |
772 | __vtime_account_kernel(prev, vtime); | |
bac5b6b6 | 773 | vtime->state = VTIME_INACTIVE; |
802f4a82 | 774 | vtime->cpu = -1; |
bac5b6b6 FW |
775 | write_seqcount_end(&vtime->seqcount); |
776 | ||
777 | vtime = ¤t->vtime; | |
778 | ||
779 | write_seqcount_begin(&vtime->seqcount); | |
14faf6fc FW |
780 | if (is_idle_task(current)) |
781 | vtime->state = VTIME_IDLE; | |
e6d5bf3e FW |
782 | else if (current->flags & PF_VCPU) |
783 | vtime->state = VTIME_GUEST; | |
14faf6fc FW |
784 | else |
785 | vtime->state = VTIME_SYS; | |
0e4097c3 | 786 | vtime->starttime = sched_clock(); |
802f4a82 | 787 | vtime->cpu = smp_processor_id(); |
bac5b6b6 | 788 | write_seqcount_end(&vtime->seqcount); |
6a61671b FW |
789 | } |
790 | ||
45eacc69 | 791 | void vtime_init_idle(struct task_struct *t, int cpu) |
6a61671b | 792 | { |
bac5b6b6 | 793 | struct vtime *vtime = &t->vtime; |
6a61671b FW |
794 | unsigned long flags; |
795 | ||
b7ce2277 | 796 | local_irq_save(flags); |
bac5b6b6 | 797 | write_seqcount_begin(&vtime->seqcount); |
14faf6fc | 798 | vtime->state = VTIME_IDLE; |
0e4097c3 | 799 | vtime->starttime = sched_clock(); |
802f4a82 | 800 | vtime->cpu = cpu; |
bac5b6b6 | 801 | write_seqcount_end(&vtime->seqcount); |
b7ce2277 | 802 | local_irq_restore(flags); |
6a61671b FW |
803 | } |
804 | ||
16a6d9be | 805 | u64 task_gtime(struct task_struct *t) |
6a61671b | 806 | { |
bac5b6b6 | 807 | struct vtime *vtime = &t->vtime; |
6a61671b | 808 | unsigned int seq; |
16a6d9be | 809 | u64 gtime; |
6a61671b | 810 | |
e5925394 | 811 | if (!vtime_accounting_enabled()) |
2541117b HS |
812 | return t->gtime; |
813 | ||
6a61671b | 814 | do { |
bac5b6b6 | 815 | seq = read_seqcount_begin(&vtime->seqcount); |
6a61671b FW |
816 | |
817 | gtime = t->gtime; | |
e6d5bf3e | 818 | if (vtime->state == VTIME_GUEST) |
2a42eb95 | 819 | gtime += vtime->gtime + vtime_delta(vtime); |
6a61671b | 820 | |
bac5b6b6 | 821 | } while (read_seqcount_retry(&vtime->seqcount, seq)); |
6a61671b FW |
822 | |
823 | return gtime; | |
824 | } | |
825 | ||
826 | /* | |
827 | * Fetch cputime raw values from fields of task_struct and | |
828 | * add up the pending nohz execution time since the last | |
829 | * cputime snapshot. | |
830 | */ | |
5613fda9 | 831 | void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) |
6a61671b | 832 | { |
bac5b6b6 | 833 | struct vtime *vtime = &t->vtime; |
6a61671b | 834 | unsigned int seq; |
bac5b6b6 | 835 | u64 delta; |
6a61671b | 836 | |
353c50eb SG |
837 | if (!vtime_accounting_enabled()) { |
838 | *utime = t->utime; | |
839 | *stime = t->stime; | |
840 | return; | |
841 | } | |
6a61671b | 842 | |
353c50eb | 843 | do { |
bac5b6b6 | 844 | seq = read_seqcount_begin(&vtime->seqcount); |
6a61671b | 845 | |
353c50eb SG |
846 | *utime = t->utime; |
847 | *stime = t->stime; | |
6a61671b | 848 | |
14faf6fc FW |
849 | /* Task is sleeping or idle, nothing to add */ |
850 | if (vtime->state < VTIME_SYS) | |
6a61671b FW |
851 | continue; |
852 | ||
bac5b6b6 | 853 | delta = vtime_delta(vtime); |
6a61671b FW |
854 | |
855 | /* | |
e6d5bf3e FW |
856 | * Task runs either in user (including guest) or kernel space, |
857 | * add pending nohz time to the right place. | |
6a61671b | 858 | */ |
e6d5bf3e | 859 | if (vtime->state == VTIME_SYS) |
2a42eb95 | 860 | *stime += vtime->stime + delta; |
e6d5bf3e FW |
861 | else |
862 | *utime += vtime->utime + delta; | |
bac5b6b6 | 863 | } while (read_seqcount_retry(&vtime->seqcount, seq)); |
6a61671b | 864 | } |
64eea63c | 865 | |
f1dfdab6 | 866 | static int vtime_state_fetch(struct vtime *vtime, int cpu) |
74722bb2 | 867 | { |
f1dfdab6 CW |
868 | int state = READ_ONCE(vtime->state); |
869 | ||
74722bb2 FW |
870 | /* |
871 | * We raced against a context switch, fetch the | |
872 | * kcpustat task again. | |
873 | */ | |
874 | if (vtime->cpu != cpu && vtime->cpu != -1) | |
875 | return -EAGAIN; | |
876 | ||
877 | /* | |
878 | * Two possible things here: | |
879 | * 1) We are seeing the scheduling out task (prev) or any past one. | |
880 | * 2) We are seeing the scheduling in task (next) but it hasn't | |
881 | * passed though vtime_task_switch() yet so the pending | |
882 | * cputime of the prev task may not be flushed yet. | |
883 | * | |
884 | * Case 1) is ok but 2) is not. So wait for a safe VTIME state. | |
885 | */ | |
f1dfdab6 | 886 | if (state == VTIME_INACTIVE) |
74722bb2 FW |
887 | return -EAGAIN; |
888 | ||
f1dfdab6 | 889 | return state; |
74722bb2 FW |
890 | } |
891 | ||
5a1c9558 FW |
892 | static u64 kcpustat_user_vtime(struct vtime *vtime) |
893 | { | |
894 | if (vtime->state == VTIME_USER) | |
895 | return vtime->utime + vtime_delta(vtime); | |
896 | else if (vtime->state == VTIME_GUEST) | |
897 | return vtime->gtime + vtime_delta(vtime); | |
898 | return 0; | |
899 | } | |
900 | ||
64eea63c | 901 | static int kcpustat_field_vtime(u64 *cpustat, |
5a1c9558 | 902 | struct task_struct *tsk, |
64eea63c FW |
903 | enum cpu_usage_stat usage, |
904 | int cpu, u64 *val) | |
905 | { | |
5a1c9558 | 906 | struct vtime *vtime = &tsk->vtime; |
64eea63c | 907 | unsigned int seq; |
64eea63c FW |
908 | |
909 | do { | |
f1dfdab6 CW |
910 | int state; |
911 | ||
64eea63c FW |
912 | seq = read_seqcount_begin(&vtime->seqcount); |
913 | ||
f1dfdab6 CW |
914 | state = vtime_state_fetch(vtime, cpu); |
915 | if (state < 0) | |
916 | return state; | |
64eea63c FW |
917 | |
918 | *val = cpustat[usage]; | |
919 | ||
5a1c9558 FW |
920 | /* |
921 | * Nice VS unnice cputime accounting may be inaccurate if | |
922 | * the nice value has changed since the last vtime update. | |
923 | * But proper fix would involve interrupting target on nice | |
924 | * updates which is a no go on nohz_full (although the scheduler | |
925 | * may still interrupt the target if rescheduling is needed...) | |
926 | */ | |
927 | switch (usage) { | |
928 | case CPUTIME_SYSTEM: | |
f1dfdab6 | 929 | if (state == VTIME_SYS) |
5a1c9558 FW |
930 | *val += vtime->stime + vtime_delta(vtime); |
931 | break; | |
932 | case CPUTIME_USER: | |
933 | if (task_nice(tsk) <= 0) | |
934 | *val += kcpustat_user_vtime(vtime); | |
935 | break; | |
936 | case CPUTIME_NICE: | |
937 | if (task_nice(tsk) > 0) | |
938 | *val += kcpustat_user_vtime(vtime); | |
939 | break; | |
940 | case CPUTIME_GUEST: | |
f1dfdab6 | 941 | if (state == VTIME_GUEST && task_nice(tsk) <= 0) |
5a1c9558 FW |
942 | *val += vtime->gtime + vtime_delta(vtime); |
943 | break; | |
944 | case CPUTIME_GUEST_NICE: | |
f1dfdab6 | 945 | if (state == VTIME_GUEST && task_nice(tsk) > 0) |
5a1c9558 FW |
946 | *val += vtime->gtime + vtime_delta(vtime); |
947 | break; | |
948 | default: | |
949 | break; | |
950 | } | |
64eea63c FW |
951 | } while (read_seqcount_retry(&vtime->seqcount, seq)); |
952 | ||
953 | return 0; | |
954 | } | |
955 | ||
956 | u64 kcpustat_field(struct kernel_cpustat *kcpustat, | |
957 | enum cpu_usage_stat usage, int cpu) | |
958 | { | |
959 | u64 *cpustat = kcpustat->cpustat; | |
e0d648f9 | 960 | u64 val = cpustat[usage]; |
64eea63c | 961 | struct rq *rq; |
64eea63c FW |
962 | int err; |
963 | ||
964 | if (!vtime_accounting_enabled_cpu(cpu)) | |
e0d648f9 | 965 | return val; |
64eea63c | 966 | |
64eea63c FW |
967 | rq = cpu_rq(cpu); |
968 | ||
969 | for (;;) { | |
970 | struct task_struct *curr; | |
64eea63c FW |
971 | |
972 | rcu_read_lock(); | |
973 | curr = rcu_dereference(rq->curr); | |
974 | if (WARN_ON_ONCE(!curr)) { | |
975 | rcu_read_unlock(); | |
976 | return cpustat[usage]; | |
977 | } | |
978 | ||
5a1c9558 | 979 | err = kcpustat_field_vtime(cpustat, curr, usage, cpu, &val); |
64eea63c FW |
980 | rcu_read_unlock(); |
981 | ||
982 | if (!err) | |
983 | return val; | |
984 | ||
985 | cpu_relax(); | |
986 | } | |
987 | } | |
988 | EXPORT_SYMBOL_GPL(kcpustat_field); | |
74722bb2 FW |
989 | |
990 | static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst, | |
991 | const struct kernel_cpustat *src, | |
992 | struct task_struct *tsk, int cpu) | |
993 | { | |
994 | struct vtime *vtime = &tsk->vtime; | |
995 | unsigned int seq; | |
74722bb2 FW |
996 | |
997 | do { | |
998 | u64 *cpustat; | |
999 | u64 delta; | |
f1dfdab6 | 1000 | int state; |
74722bb2 FW |
1001 | |
1002 | seq = read_seqcount_begin(&vtime->seqcount); | |
1003 | ||
f1dfdab6 CW |
1004 | state = vtime_state_fetch(vtime, cpu); |
1005 | if (state < 0) | |
1006 | return state; | |
74722bb2 FW |
1007 | |
1008 | *dst = *src; | |
1009 | cpustat = dst->cpustat; | |
1010 | ||
1011 | /* Task is sleeping, dead or idle, nothing to add */ | |
f1dfdab6 | 1012 | if (state < VTIME_SYS) |
74722bb2 FW |
1013 | continue; |
1014 | ||
1015 | delta = vtime_delta(vtime); | |
1016 | ||
1017 | /* | |
1018 | * Task runs either in user (including guest) or kernel space, | |
1019 | * add pending nohz time to the right place. | |
1020 | */ | |
f1dfdab6 | 1021 | if (state == VTIME_SYS) { |
74722bb2 | 1022 | cpustat[CPUTIME_SYSTEM] += vtime->stime + delta; |
f1dfdab6 | 1023 | } else if (state == VTIME_USER) { |
74722bb2 FW |
1024 | if (task_nice(tsk) > 0) |
1025 | cpustat[CPUTIME_NICE] += vtime->utime + delta; | |
1026 | else | |
1027 | cpustat[CPUTIME_USER] += vtime->utime + delta; | |
1028 | } else { | |
f1dfdab6 | 1029 | WARN_ON_ONCE(state != VTIME_GUEST); |
74722bb2 FW |
1030 | if (task_nice(tsk) > 0) { |
1031 | cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta; | |
1032 | cpustat[CPUTIME_NICE] += vtime->gtime + delta; | |
1033 | } else { | |
1034 | cpustat[CPUTIME_GUEST] += vtime->gtime + delta; | |
1035 | cpustat[CPUTIME_USER] += vtime->gtime + delta; | |
1036 | } | |
1037 | } | |
1038 | } while (read_seqcount_retry(&vtime->seqcount, seq)); | |
1039 | ||
f1dfdab6 | 1040 | return 0; |
74722bb2 FW |
1041 | } |
1042 | ||
1043 | void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu) | |
1044 | { | |
1045 | const struct kernel_cpustat *src = &kcpustat_cpu(cpu); | |
1046 | struct rq *rq; | |
1047 | int err; | |
1048 | ||
1049 | if (!vtime_accounting_enabled_cpu(cpu)) { | |
1050 | *dst = *src; | |
1051 | return; | |
1052 | } | |
1053 | ||
1054 | rq = cpu_rq(cpu); | |
1055 | ||
1056 | for (;;) { | |
1057 | struct task_struct *curr; | |
1058 | ||
1059 | rcu_read_lock(); | |
1060 | curr = rcu_dereference(rq->curr); | |
1061 | if (WARN_ON_ONCE(!curr)) { | |
1062 | rcu_read_unlock(); | |
1063 | *dst = *src; | |
1064 | return; | |
1065 | } | |
1066 | ||
1067 | err = kcpustat_cpu_fetch_vtime(dst, src, curr, cpu); | |
1068 | rcu_read_unlock(); | |
1069 | ||
1070 | if (!err) | |
1071 | return; | |
1072 | ||
1073 | cpu_relax(); | |
1074 | } | |
1075 | } | |
1076 | EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch); | |
1077 | ||
abf917cd | 1078 | #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ |