]>
Commit | Line | Data |
---|---|---|
1 | // SPDX-License-Identifier: GPL-2.0-or-later | |
2 | /* delayacct.c - per-task delay accounting | |
3 | * | |
4 | * Copyright (C) Shailabh Nagar, IBM Corp. 2006 | |
5 | */ | |
6 | ||
7 | #include <linux/sched.h> | |
8 | #include <linux/sched/task.h> | |
9 | #include <linux/sched/cputime.h> | |
10 | #include <linux/sched/clock.h> | |
11 | #include <linux/slab.h> | |
12 | #include <linux/taskstats.h> | |
13 | #include <linux/sysctl.h> | |
14 | #include <linux/delayacct.h> | |
15 | #include <linux/module.h> | |
16 | ||
17 | DEFINE_STATIC_KEY_FALSE(delayacct_key); | |
18 | int delayacct_on __read_mostly; /* Delay accounting turned on/off */ | |
19 | struct kmem_cache *delayacct_cache; | |
20 | ||
21 | static void set_delayacct(bool enabled) | |
22 | { | |
23 | if (enabled) { | |
24 | static_branch_enable(&delayacct_key); | |
25 | delayacct_on = 1; | |
26 | } else { | |
27 | delayacct_on = 0; | |
28 | static_branch_disable(&delayacct_key); | |
29 | } | |
30 | } | |
31 | ||
32 | static int __init delayacct_setup_enable(char *str) | |
33 | { | |
34 | delayacct_on = 1; | |
35 | return 1; | |
36 | } | |
37 | __setup("delayacct", delayacct_setup_enable); | |
38 | ||
39 | void delayacct_init(void) | |
40 | { | |
41 | delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT); | |
42 | delayacct_tsk_init(&init_task); | |
43 | set_delayacct(delayacct_on); | |
44 | } | |
45 | ||
46 | #ifdef CONFIG_PROC_SYSCTL | |
47 | int sysctl_delayacct(struct ctl_table *table, int write, void *buffer, | |
48 | size_t *lenp, loff_t *ppos) | |
49 | { | |
50 | int state = delayacct_on; | |
51 | struct ctl_table t; | |
52 | int err; | |
53 | ||
54 | if (write && !capable(CAP_SYS_ADMIN)) | |
55 | return -EPERM; | |
56 | ||
57 | t = *table; | |
58 | t.data = &state; | |
59 | err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); | |
60 | if (err < 0) | |
61 | return err; | |
62 | if (write) | |
63 | set_delayacct(state); | |
64 | return err; | |
65 | } | |
66 | #endif | |
67 | ||
68 | void __delayacct_tsk_init(struct task_struct *tsk) | |
69 | { | |
70 | tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL); | |
71 | if (tsk->delays) | |
72 | raw_spin_lock_init(&tsk->delays->lock); | |
73 | } | |
74 | ||
75 | /* | |
76 | * Finish delay accounting for a statistic using its timestamps (@start), | |
77 | * accumalator (@total) and @count | |
78 | */ | |
79 | static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count) | |
80 | { | |
81 | s64 ns = local_clock() - *start; | |
82 | unsigned long flags; | |
83 | ||
84 | if (ns > 0) { | |
85 | raw_spin_lock_irqsave(lock, flags); | |
86 | *total += ns; | |
87 | (*count)++; | |
88 | raw_spin_unlock_irqrestore(lock, flags); | |
89 | } | |
90 | } | |
91 | ||
92 | void __delayacct_blkio_start(void) | |
93 | { | |
94 | current->delays->blkio_start = local_clock(); | |
95 | } | |
96 | ||
97 | /* | |
98 | * We cannot rely on the `current` macro, as we haven't yet switched back to | |
99 | * the process being woken. | |
100 | */ | |
101 | void __delayacct_blkio_end(struct task_struct *p) | |
102 | { | |
103 | struct task_delay_info *delays = p->delays; | |
104 | u64 *total; | |
105 | u32 *count; | |
106 | ||
107 | if (p->delays->flags & DELAYACCT_PF_SWAPIN) { | |
108 | total = &delays->swapin_delay; | |
109 | count = &delays->swapin_count; | |
110 | } else { | |
111 | total = &delays->blkio_delay; | |
112 | count = &delays->blkio_count; | |
113 | } | |
114 | ||
115 | delayacct_end(&delays->lock, &delays->blkio_start, total, count); | |
116 | } | |
117 | ||
118 | int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) | |
119 | { | |
120 | u64 utime, stime, stimescaled, utimescaled; | |
121 | unsigned long long t2, t3; | |
122 | unsigned long flags, t1; | |
123 | s64 tmp; | |
124 | ||
125 | task_cputime(tsk, &utime, &stime); | |
126 | tmp = (s64)d->cpu_run_real_total; | |
127 | tmp += utime + stime; | |
128 | d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; | |
129 | ||
130 | task_cputime_scaled(tsk, &utimescaled, &stimescaled); | |
131 | tmp = (s64)d->cpu_scaled_run_real_total; | |
132 | tmp += utimescaled + stimescaled; | |
133 | d->cpu_scaled_run_real_total = | |
134 | (tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp; | |
135 | ||
136 | /* | |
137 | * No locking available for sched_info (and too expensive to add one) | |
138 | * Mitigate by taking snapshot of values | |
139 | */ | |
140 | t1 = tsk->sched_info.pcount; | |
141 | t2 = tsk->sched_info.run_delay; | |
142 | t3 = tsk->se.sum_exec_runtime; | |
143 | ||
144 | d->cpu_count += t1; | |
145 | ||
146 | tmp = (s64)d->cpu_delay_total + t2; | |
147 | d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; | |
148 | ||
149 | tmp = (s64)d->cpu_run_virtual_total + t3; | |
150 | d->cpu_run_virtual_total = | |
151 | (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; | |
152 | ||
153 | if (!tsk->delays) | |
154 | return 0; | |
155 | ||
156 | /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ | |
157 | ||
158 | raw_spin_lock_irqsave(&tsk->delays->lock, flags); | |
159 | tmp = d->blkio_delay_total + tsk->delays->blkio_delay; | |
160 | d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp; | |
161 | tmp = d->swapin_delay_total + tsk->delays->swapin_delay; | |
162 | d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp; | |
163 | tmp = d->freepages_delay_total + tsk->delays->freepages_delay; | |
164 | d->freepages_delay_total = (tmp < d->freepages_delay_total) ? 0 : tmp; | |
165 | tmp = d->thrashing_delay_total + tsk->delays->thrashing_delay; | |
166 | d->thrashing_delay_total = (tmp < d->thrashing_delay_total) ? 0 : tmp; | |
167 | d->blkio_count += tsk->delays->blkio_count; | |
168 | d->swapin_count += tsk->delays->swapin_count; | |
169 | d->freepages_count += tsk->delays->freepages_count; | |
170 | d->thrashing_count += tsk->delays->thrashing_count; | |
171 | raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); | |
172 | ||
173 | return 0; | |
174 | } | |
175 | ||
176 | __u64 __delayacct_blkio_ticks(struct task_struct *tsk) | |
177 | { | |
178 | __u64 ret; | |
179 | unsigned long flags; | |
180 | ||
181 | raw_spin_lock_irqsave(&tsk->delays->lock, flags); | |
182 | ret = nsec_to_clock_t(tsk->delays->blkio_delay + | |
183 | tsk->delays->swapin_delay); | |
184 | raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); | |
185 | return ret; | |
186 | } | |
187 | ||
188 | void __delayacct_freepages_start(void) | |
189 | { | |
190 | current->delays->freepages_start = local_clock(); | |
191 | } | |
192 | ||
193 | void __delayacct_freepages_end(void) | |
194 | { | |
195 | delayacct_end(¤t->delays->lock, | |
196 | ¤t->delays->freepages_start, | |
197 | ¤t->delays->freepages_delay, | |
198 | ¤t->delays->freepages_count); | |
199 | } | |
200 | ||
201 | void __delayacct_thrashing_start(void) | |
202 | { | |
203 | current->delays->thrashing_start = local_clock(); | |
204 | } | |
205 | ||
206 | void __delayacct_thrashing_end(void) | |
207 | { | |
208 | delayacct_end(¤t->delays->lock, | |
209 | ¤t->delays->thrashing_start, | |
210 | ¤t->delays->thrashing_delay, | |
211 | ¤t->delays->thrashing_count); | |
212 | } |